move almost all hashing to SHA256, consolidate into hashutil.py

The only SHA-1 hash that remains is used in the permutation of nodeids,
where we need to decide if we care about performance or long-term security.
I suspect that we could use a much weaker hash (and faster) hash for
this purpose. In the long run, we'll be doing thousands of such hashes
for each file uploaded or downloaded (one per known peer).
This commit is contained in:
Brian Warner 2007-06-07 21:47:21 -07:00
parent cabba59fe7
commit c049941529
11 changed files with 76 additions and 45 deletions

View File

@ -1,15 +1,14 @@
import os, random, sha
import os, random
from zope.interface import implements
from twisted.python import log
from twisted.internet import defer
from twisted.application import service
from allmydata.util import idlib, mathutil, bencode
from allmydata.util import idlib, mathutil, bencode, hashutil
from allmydata.util.assertutil import _assert
from allmydata import codec, hashtree
from allmydata.Crypto.Cipher import AES
from allmydata.Crypto.Hash import SHA256
from allmydata.uri import unpack_uri
from allmydata.interfaces import IDownloadTarget, IDownloader
@ -32,8 +31,8 @@ class Output:
self.downloadable = downloadable
self._decryptor = AES.new(key=key, mode=AES.MODE_CTR,
counterstart="\x00"*16)
self._verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
self._fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
self._verifierid_hasher = hashutil.verifierid_hasher()
self._fileid_hasher = hashutil.fileid_hasher()
self.length = 0
self._segment_number = 0
self._plaintext_hash_tree = None
@ -54,7 +53,7 @@ class Output:
# 2*segment_size.
self._verifierid_hasher.update(crypttext)
if self._crypttext_hash_tree:
ch = SHA256.new(netstring("allmydata_crypttext_segment_v1"))
ch = hashutil.crypttext_segment_hasher()
ch.update(crypttext)
crypttext_leaves = {self._segment_number: ch.digest()}
self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
@ -66,7 +65,7 @@ class Output:
self._fileid_hasher.update(plaintext)
if self._plaintext_hash_tree:
ph = SHA256.new(netstring("allmydata_plaintext_segment_v1"))
ph = hashutil.plaintext_segment_hasher()
ph.update(plaintext)
plaintext_leaves = {self._segment_number: ph.digest()}
self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves)
@ -140,7 +139,7 @@ class ValidatedBucket:
#log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d" %
# (self.sharenum, blocknum, len(blockdata)))
blockhash = hashtree.block_hash(blockdata)
blockhash = hashutil.block_hash(blockdata)
# we always validate the blockhash
bh = dict(enumerate(blockhashes))
# replace blockhash root with validated value
@ -350,7 +349,7 @@ class FileDownloader:
# comes back, and compare it against the version in our URI. If they
# don't match, ignore their data and try someone else.
def _validate(proposal, bucket):
h = hashtree.thingA_hash(proposal)
h = hashutil.thingA_hash(proposal)
if h != self._thingA_hash:
self._fetch_failures["thingA"] += 1
msg = ("The copy of thingA we received from %s was bad" %
@ -392,7 +391,7 @@ class FileDownloader:
verifierid = d['verifierid']
assert isinstance(verifierid, str)
assert len(verifierid) == 20
assert len(verifierid) == 32
self._verifierid = verifierid
self._fileid = d['fileid']
self._roothash = d['share_root_hash']

View File

@ -3,10 +3,9 @@
from zope.interface import implements
from twisted.internet import defer
from twisted.python import log
from allmydata.hashtree import HashTree, block_hash, thingA_hash
from allmydata.hashtree import HashTree
from allmydata.Crypto.Cipher import AES
from allmydata.Crypto.Hash import SHA256
from allmydata.util import mathutil, bencode
from allmydata.util import mathutil, bencode, hashutil
from allmydata.util.assertutil import _assert
from allmydata.codec import CRSEncoder
from allmydata.interfaces import IEncoder
@ -224,8 +223,8 @@ class Encoder(object):
# of additional shares which can be substituted if the primary ones
# are unavailable
plaintext_hasher = SHA256.new(netstring("allmydata_plaintext_segment_v1"))
crypttext_hasher = SHA256.new(netstring("allmydata_crypttext_segment_v1"))
plaintext_hasher = hashutil.plaintext_segment_hasher()
crypttext_hasher = hashutil.crypttext_segment_hasher()
# memory footprint: we only hold a tiny piece of the plaintext at any
# given time. We build up a segment's worth of cryptttext, then hand
@ -258,8 +257,8 @@ class Encoder(object):
codec = self._tail_codec
input_piece_size = codec.get_block_size()
plaintext_hasher = SHA256.new(netstring("allmydata_plaintext_segment_v1"))
crypttext_hasher = SHA256.new(netstring("allmydata_crypttext_segment_v1"))
plaintext_hasher = hashutil.plaintext_segment_hasher()
crypttext_hasher = hashutil.crypttext_segment_hasher()
for i in range(self.required_shares):
input_piece = self.infile.read(input_piece_size)
@ -297,7 +296,7 @@ class Encoder(object):
shareid = shareids[i]
d = self.send_subshare(shareid, segnum, subshare)
dl.append(d)
subshare_hash = block_hash(subshare)
subshare_hash = hashutil.block_hash(subshare)
self.subshare_hashes[shareid].append(subshare_hash)
dl = self._gather_responses(dl)
def _logit(res):
@ -437,7 +436,7 @@ class Encoder(object):
def send_thingA_to_all_shareholders(self):
log.msg("%s: sending thingA" % self)
thingA = bencode.bencode(self.thingA_data)
self.thingA_hash = thingA_hash(thingA)
self.thingA_hash = hashutil.thingA_hash(thingA)
dl = []
for shareid in self.landlords.keys():
dl.append(self.send_thingA(shareid, thingA))

View File

@ -446,9 +446,3 @@ class IncompleteHashTree(CompleteBinaryTreeMixin, list):
for i in added:
self[i] = None
raise
def block_hash(data):
return tagged_hash("encoded subshare", data)
def thingA_hash(data):
return tagged_hash("thingA", data)

View File

@ -11,7 +11,6 @@ Hash = StringConstraint(maxLength=HASH_SIZE,
Nodeid = StringConstraint(maxLength=20,
minLength=20) # binary format 20-byte SHA1 hash
FURL = StringConstraint(1000)
Verifierid = StringConstraint(20)
StorageIndex = StringConstraint(32)
URI = StringConstraint(300) # kind of arbitrary
MAX_BUCKETS = 200 # per peer
@ -121,12 +120,12 @@ RIMutableDirectoryNode_ = Any() # TODO: how can we avoid this?
class RIMutableDirectoryNode(RemoteInterface):
def list():
return ListOf( TupleOf(str, # name, relative to directory
ChoiceOf(RIMutableDirectoryNode_, Verifierid)),
ChoiceOf(RIMutableDirectoryNode_, URI)),
maxLength=100,
)
def get(name=str):
return ChoiceOf(RIMutableDirectoryNode_, Verifierid)
return ChoiceOf(RIMutableDirectoryNode_, URI)
def add_directory(name=str):
return RIMutableDirectoryNode_

View File

@ -7,7 +7,6 @@ from allmydata import encode, download, hashtree
from allmydata.util import hashutil
from allmydata.uri import pack_uri
from allmydata.Crypto.Cipher import AES
import sha
from cStringIO import StringIO
def netstring(s):
@ -300,11 +299,11 @@ class Roundtrip(unittest.TestCase):
peer = FakeBucketWriter(mode)
shareholders[shnum] = peer
e.set_shareholders(shareholders)
fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
fileid_hasher = hashutil.fileid_hasher()
fileid_hasher.update(data)
cryptor = AES.new(key=nonkey, mode=AES.MODE_CTR,
counterstart="\x00"*16)
verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
verifierid_hasher = hashutil.verifierid_hasher()
verifierid_hasher.update(cryptor.encrypt(data))
e.set_thingA_data({'verifierid': verifierid_hasher.digest(),
@ -322,7 +321,7 @@ class Roundtrip(unittest.TestCase):
if "corrupt_key" in recover_mode:
key = flip_bit(key)
URI = pack_uri(storage_index="S" * 20,
URI = pack_uri(storage_index="S" * 32,
key=key,
thingA_hash=thingA_hash,
needed_shares=e.required_shares,

View File

@ -216,7 +216,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
# change the storage index, which means we'll be asking about the
# wrong file, so nobody will have any shares
d = uri.unpack_uri(gooduri)
assert len(d['storage_index']) == 20
assert len(d['storage_index']) == 32
d['storage_index'] = self.flip_bit(d['storage_index'])
return uri.pack_uri(**d)

View File

@ -26,7 +26,7 @@ class GoodServer(unittest.TestCase):
self.failUnless(uri.startswith("URI:"))
d = unpack_uri(uri)
self.failUnless(isinstance(d['storage_index'], str))
self.failUnlessEqual(len(d['storage_index']), 20)
self.failUnlessEqual(len(d['storage_index']), 32)
self.failUnless(isinstance(d['key'], str))
self.failUnlessEqual(len(d['key']), 16)

View File

@ -4,14 +4,14 @@ from twisted.internet import defer
from twisted.application import service
from foolscap import Referenceable
from allmydata.util import idlib
from allmydata.util import idlib, hashutil
from allmydata import encode
from allmydata.uri import pack_uri
from allmydata.interfaces import IUploadable, IUploader
from allmydata.Crypto.Cipher import AES
from cStringIO import StringIO
import collections, random, sha
import collections, random
class NotEnoughPeersError(Exception):
pass
@ -75,10 +75,10 @@ class FileUploader:
def set_id_strings(self, verifierid, fileid):
assert isinstance(verifierid, str)
assert len(verifierid) == 20
assert len(verifierid) == 32
self._verifierid = verifierid
assert isinstance(fileid, str)
assert len(fileid) == 20
assert len(fileid) == 32
self._fileid = fileid
def set_encryption_key(self, key):
@ -298,8 +298,8 @@ class Uploader(service.MultiService):
def compute_id_strings(self, f):
# return a list of (fileid, encryptionkey, verifierid)
fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
enckey_hasher = sha.new(netstring("allmydata_encryption_key_v1"))
fileid_hasher = hashutil.fileid_hasher()
enckey_hasher = hashutil.key_hasher()
f.seek(0)
BLOCKSIZE = 64*1024
while True:
@ -313,7 +313,7 @@ class Uploader(service.MultiService):
# now make a second pass to determine the verifierid. It would be
# nice to make this involve fewer passes.
verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
verifierid_hasher = hashutil.verifierid_hasher()
key = enckey[:16]
cryptor = AES.new(key=key, mode=AES.MODE_CTR,
counterstart="\x00"*16)

View File

@ -9,7 +9,7 @@ def pack_uri(storage_index, key, thingA_hash,
needed_shares, total_shares, size):
# applications should pass keyword parameters into this
assert isinstance(storage_index, str)
assert len(storage_index) == 20 # sha1 hash. TODO: sha256
assert len(storage_index) == 32 # sha256 hash
assert isinstance(thingA_hash, str)
assert len(thingA_hash) == 32 # sha56 hash

View File

@ -16,3 +16,43 @@ def tagged_pair_hash(tag, val1, val2):
s.update(netstring(val2))
return s.digest()
# specific hash tags that we use
def tagged_hasher(tag):
return SHA256.new(netstring(tag))
def block_hash(data):
return tagged_hash("allmydata_encoded_subshare_v1", data)
def block_hasher():
return tagged_hasher("allmydata_encoded_subshare_v1")
def thingA_hash(data):
return tagged_hash("thingA", data)
def thingA_hasher():
return tagged_hasher("thingA")
def fileid_hash(data):
return tagged_hash("allmydata_fileid_v1", data)
def fileid_hasher():
return tagged_hasher("allmydata_fileid_v1")
def verifierid_hash(data):
return tagged_hash("allmydata_verifierid_v1", data)
def verifierid_hasher():
return tagged_hasher("allmydata_verifierid_v1")
def crypttext_segment_hash(data):
return tagged_hash("allmydata_crypttext_segment_v1", data)
def crypttext_segment_hasher():
return tagged_hasher("allmydata_crypttext_segment_v1")
def plaintext_segment_hash(data):
return tagged_hash("allmydata_plaintext_segment_v1", data)
def plaintext_segment_hasher():
return tagged_hasher("allmydata_plaintext_segment_v1")
def key_hash(data):
return tagged_hash("allmydata_encryption_key_v1", data)
def key_hasher():
return tagged_hasher("allmydata_encryption_key_v1")

View File

@ -1,10 +1,11 @@
import os, shutil, sha
import os, shutil
from zope.interface import implements
from twisted.internet import defer
from allmydata.util import bencode
from allmydata.util.idlib import b2a
from allmydata.Crypto.Cipher import AES
from allmydata.Crypto.Hash import SHA256
from allmydata.filetree.nodemaker import NodeMaker
from allmydata.filetree.interfaces import INode
from allmydata.filetree.file import CHKFileNode
@ -382,9 +383,9 @@ def make_aes_key():
def make_rsa_key():
raise NotImplementedError
def hash_sha(data):
return sha.new(data).digest()
return SHA256.new(data).digest()
def hash_sha_to_key(data):
return sha.new(data).digest()[:AES_KEY_LENGTH]
return SHA256.new(data).digest()[:AES_KEY_LENGTH]
def aes_encrypt(key, plaintext):
assert isinstance(key, str)
assert len(key) == AES_KEY_LENGTH