use real encryption, generate/store/verify verifierid and fileid

This commit is contained in:
Brian Warner 2007-04-25 17:53:10 -07:00
parent adc402c481
commit 4b2298937b
7 changed files with 127 additions and 45 deletions

@ -25,8 +25,8 @@ class Output:
self.downloadable = downloadable
self._decryptor = AES.new(key=key, mode=AES.MODE_CTR,
counterstart="\x00"*16)
self._verifierid_hasher = sha.new(netstring("allmydata_v1_verifierid"))
self._fileid_hasher = sha.new(netstring("allmydata_v1_fileid"))
self._verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
self._fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
self.length = 0
def open(self):
@ -208,14 +208,17 @@ class SegmentDownloader:
del self.parent._share_buckets[shnum]
class FileDownloader:
check_verifierid = True
check_fileid = True
def __init__(self, client, uri, downloadable):
self._client = client
self._downloadable = downloadable
(codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size) = unpack_uri(uri)
(codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size) = unpack_uri(uri)
assert isinstance(verifierid, str)
assert len(verifierid) == 20
self._verifierid = verifierid
self._fileid = fileid
self._roothash = roothash
self._codec = codec.get_decoder_by_name(codec_name)
@ -230,7 +233,6 @@ class FileDownloader:
self._size = size
self._num_needed_shares = self._codec.get_needed_shares()
key = "\x00" * 16
self._output = Output(downloadable, key)
self._share_hashtree = hashtree.IncompleteHashTree(total_shares)
@ -349,10 +351,18 @@ class FileDownloader:
def _done(self, res):
self._output.close()
#print "VERIFIERID: %s" % idlib.b2a(self._output.verifierid)
#print "FILEID: %s" % idlib.b2a(self._output.fileid)
#assert self._verifierid == self._output.verifierid
#assert self._fileid = self._output.fileid
log.msg("computed VERIFIERID: %s" % idlib.b2a(self._output.verifierid))
log.msg("computed FILEID: %s" % idlib.b2a(self._output.fileid))
if self.check_verifierid:
_assert(self._verifierid == self._output.verifierid,
"bad verifierid: computed=%s, expected=%s" %
(idlib.b2a(self._output.verifierid),
idlib.b2a(self._verifierid)))
if self.check_fileid:
_assert(self._fileid == self._output.fileid,
"bad fileid: computed=%s, expected=%s" %
(idlib.b2a(self._output.fileid),
idlib.b2a(self._fileid)))
_assert(self._output.length == self._size,
got=self._output.length, expected=self._size)
return self._output.finish()

@ -79,8 +79,11 @@ class Encoder(object):
self.NEEDED_SHARES = k
self.TOTAL_SHARES = n
def setup(self, infile):
def setup(self, infile, encryption_key):
self.infile = infile
assert isinstance(encryption_key, str)
assert len(encryption_key) == 16 # AES-128
self.key = encryption_key
infile.seek(0, 2)
self.file_size = infile.tell()
infile.seek(0, 0)
@ -158,7 +161,6 @@ class Encoder(object):
return d
def setup_encryption(self):
self.key = "\x00"*16
self.cryptor = AES.new(key=self.key, mode=AES.MODE_CTR,
counterstart="\x00"*16)
self.segment_num = 0

@ -115,7 +115,8 @@ class Encode(unittest.TestCase):
# force use of multiple segments
options = {"max_segment_size": max_segment_size}
e = encode.Encoder(options)
e.setup(StringIO(data))
nonkey = "\x00" * 16
e.setup(StringIO(data), nonkey)
assert e.num_shares == NUM_SHARES # else we'll be completely confused
e.setup_codec() # need to rebuild the codec for that change
assert (NUM_SEGMENTS-1)*e.segment_size < len(data) <= NUM_SEGMENTS*e.segment_size
@ -222,7 +223,8 @@ class Roundtrip(unittest.TestCase):
options = {"max_segment_size": max_segment_size,
"needed_and_total_shares": k_and_n}
e = encode.Encoder(options)
e.setup(StringIO(data))
nonkey = "\x00" * 16
e.setup(StringIO(data), nonkey)
assert e.num_shares == NUM_SHARES # else we'll be completely confused
e.setup_codec() # need to rebuild the codec for that change
@ -238,18 +240,22 @@ class Roundtrip(unittest.TestCase):
e.set_shareholders(shareholders)
d = e.start()
def _uploaded(roothash):
URI = pack_uri(e._codec.get_encoder_type(),
e._codec.get_serialized_params(),
e._tail_codec.get_serialized_params(),
"V" * 20,
roothash,
e.required_shares,
e.num_shares,
e.file_size,
e.segment_size)
URI = pack_uri(codec_name=e._codec.get_encoder_type(),
codec_params=e._codec.get_serialized_params(),
tail_codec_params=e._tail_codec.get_serialized_params(),
verifierid="V" * 20,
fileid="F" * 20,
key=nonkey,
roothash=roothash,
needed_shares=e.required_shares,
total_shares=e.num_shares,
size=e.file_size,
segment_size=e.segment_size)
client = None
target = download.Data()
fd = download.FileDownloader(client, URI, target)
fd.check_verifierid = False
fd.check_fileid = False
for shnum in range(AVAILABLE_SHARES):
bucket = all_shareholders[shnum]
fd.add_share_bucket(shnum, bucket)

@ -194,7 +194,8 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
d1 = self.downloader.download_to_data(baduri)
def _baduri_should_fail(res):
self.failUnless(isinstance(res, Failure))
self.failUnless(res.check(download.NotEnoughPeersError))
self.failUnless(res.check(download.NotEnoughPeersError),
"expected NotEnoughPeersError, got %s" % res)
# TODO: files that have zero peers should get a special kind
# of NotEnoughPeersError, which can be used to suggest that
# the URI might be wrong or that they've nver uploaded the
@ -209,11 +210,19 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
return good[:-1] + chr(ord(good[-1]) ^ 0x01)
def mangle_uri(self, gooduri):
# change the verifierid, which means we'll be asking about the wrong
# file, so nobody will have any shares
pieces = list(uri.unpack_uri(gooduri))
# [4] is the verifierid
pieces[4] = self.flip_bit(pieces[4])
# [3] is the verifierid
assert len(pieces[3]) == 20
pieces[3] = self.flip_bit(pieces[3])
return uri.pack_uri(*pieces)
# TODO: add a test which mangles the fileid instead, and should fail in
# the post-download phase when the file's integrity check fails. Do the
# same thing for the key, which should cause the download to fail the
# post-download verifierid check.
def test_vdrive(self):
self.basedir = "test_system/SystemTest/test_vdrive"
self.data = DATA = "Some data to publish to the virtual drive\n"

@ -24,9 +24,13 @@ class GoodServer(unittest.TestCase):
def _check(self, uri):
self.failUnless(isinstance(uri, str))
self.failUnless(uri.startswith("URI:"))
codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size = unpack_uri(uri)
codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size = unpack_uri(uri)
self.failUnless(isinstance(verifierid, str))
self.failUnlessEqual(len(verifierid), 20)
self.failUnless(isinstance(fileid, str))
self.failUnlessEqual(len(fileid), 20)
self.failUnless(isinstance(key, str))
self.failUnlessEqual(len(key), 16)
self.failUnless(isinstance(codec_params, str))
def testData(self):

@ -8,6 +8,7 @@ from allmydata.util import idlib
from allmydata import encode
from allmydata.uri import pack_uri
from allmydata.interfaces import IUploadable, IUploader
from allmydata.Crypto.Cipher import AES
from cStringIO import StringIO
import collections, random, sha
@ -72,10 +73,18 @@ class FileUploader:
self._size = filehandle.tell()
filehandle.seek(0)
def set_verifierid(self, vid):
assert isinstance(vid, str)
assert len(vid) == 20
self._verifierid = vid
def set_id_strings(self, verifierid, fileid):
assert isinstance(verifierid, str)
assert len(verifierid) == 20
self._verifierid = verifierid
assert isinstance(fileid, str)
assert len(fileid) == 20
self._fileid = fileid
def set_encryption_key(self, key):
assert isinstance(key, str)
assert len(key) == 16 # AES-128
self._encryption_key = key
def start(self):
"""Start uploading the file.
@ -91,7 +100,7 @@ class FileUploader:
# create the encoder, so we can know how large the shares will be
self._encoder = encode.Encoder(self._options)
self._encoder.setup(self._filehandle)
self._encoder.setup(self._filehandle, self._encryption_key)
share_size = self._encoder.get_share_size()
block_size = self._encoder.get_block_size()
@ -234,10 +243,17 @@ class FileUploader:
codec_type = self._encoder._codec.get_encoder_type()
codec_params = self._encoder._codec.get_serialized_params()
tail_codec_params = self._encoder._tail_codec.get_serialized_params()
return pack_uri(codec_type, codec_params, tail_codec_params,
self._verifierid,
roothash, self.needed_shares, self.total_shares,
self._size, self._encoder.segment_size)
return pack_uri(codec_name=codec_type,
codec_params=codec_params,
tail_codec_params=tail_codec_params,
verifierid=self._verifierid,
fileid=self._fileid,
key=self._encryption_key,
roothash=roothash,
needed_shares=self.needed_shares,
total_shares=self.total_shares,
size=self._size,
segment_size=self._encoder.segment_size)
def netstring(s):
@ -282,14 +298,39 @@ class Uploader(service.MultiService):
desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many.
total_shares = 100 # Total number of shares created by encoding. If everybody has room then this is is how many we will upload.
def _compute_verifierid(self, f):
hasher = sha.new(netstring("allmydata_v1_verifierid"))
def compute_id_strings(self, f):
# return a list of (fileid, encryptionkey, verifierid)
fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
enckey_hasher = sha.new(netstring("allmydata_encryption_key_v1"))
f.seek(0)
data = f.read()
hasher.update(data)#f.read())
BLOCKSIZE = 64*1024
while True:
data = f.read(BLOCKSIZE)
if not data:
break
fileid_hasher.update(data)
enckey_hasher.update(data)
fileid = fileid_hasher.digest()
enckey = enckey_hasher.digest()
# now make a second pass to determine the verifierid. It would be
# nice to make this involve fewer passes.
verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
key = enckey[:16]
cryptor = AES.new(key=key, mode=AES.MODE_CTR,
counterstart="\x00"*16)
f.seek(0)
# note: this is only of the plaintext data, no encryption yet
return hasher.digest()
while True:
data = f.read(BLOCKSIZE)
if not data:
break
verifierid_hasher.update(cryptor.encrypt(data))
verifierid = verifierid_hasher.digest()
# and leave the file pointer at the beginning
f.seek(0)
return fileid, key, verifierid
def upload(self, f, options={}):
# this returns the URI
@ -300,7 +341,9 @@ class Uploader(service.MultiService):
u = self.uploader_class(self.parent, options)
u.set_filehandle(fh)
u.set_params(self.needed_shares, self.desired_shares, self.total_shares)
u.set_verifierid(self._compute_verifierid(fh))
fileid, key, verifierid = self.compute_id_strings(fh)
u.set_encryption_key(key)
u.set_id_strings(verifierid, fileid)
d = u.start()
def _done(res):
f.close_filehandle(fh)

@ -5,7 +5,9 @@ from allmydata.util import idlib
# enough information to retrieve and validate the contents. It shall be
# expressed in a limited character set (namely [TODO]).
def pack_uri(codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size):
def pack_uri(codec_name, codec_params, tail_codec_params,
verifierid, fileid, key,
roothash, needed_shares, total_shares, size, segment_size):
assert isinstance(codec_name, str)
assert len(codec_name) < 10
assert ":" not in codec_name
@ -15,18 +17,24 @@ def pack_uri(codec_name, codec_params, tail_codec_params, verifierid, roothash,
assert ":" not in tail_codec_params
assert isinstance(verifierid, str)
assert len(verifierid) == 20 # sha1 hash
return "URI:%s:%s:%s:%s:%s:%s:%s:%s:%s" % (codec_name, codec_params, tail_codec_params, idlib.b2a(verifierid), idlib.b2a(roothash), needed_shares, total_shares, size, segment_size)
assert isinstance(fileid, str)
assert len(fileid) == 20 # sha1 hash
assert isinstance(key, str)
assert len(key) == 16 # AES-128
return "URI:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s" % (codec_name, codec_params, tail_codec_params, idlib.b2a(verifierid), idlib.b2a(fileid), idlib.b2a(key), idlib.b2a(roothash), needed_shares, total_shares, size, segment_size)
def unpack_uri(uri):
assert uri.startswith("URI:")
header, codec_name, codec_params, tail_codec_params, verifierid_s, roothash_s, needed_shares_s, total_shares_s, size_s, segment_size_s = uri.split(":")
header, codec_name, codec_params, tail_codec_params, verifierid_s, fileid_s, key_s, roothash_s, needed_shares_s, total_shares_s, size_s, segment_size_s = uri.split(":")
verifierid = idlib.a2b(verifierid_s)
fileid = idlib.a2b(fileid_s)
key = idlib.a2b(key_s)
roothash = idlib.a2b(roothash_s)
needed_shares = int(needed_shares_s)
total_shares = int(total_shares_s)
size = int(size_s)
segment_size = int(segment_size_s)
return codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size
return codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size