2007-01-05 01:06:20 +00:00
|
|
|
# -*- test-case-name: allmydata.test.test_encode -*-
|
2006-12-14 03:32:35 +00:00
|
|
|
|
|
|
|
from twisted.internet import defer
|
2006-12-14 11:31:17 +00:00
|
|
|
from allmydata.chunk import HashTree, roundup_pow2
|
2006-12-14 03:32:35 +00:00
|
|
|
from Crypto.Cipher import AES
|
|
|
|
import sha
|
2006-12-29 20:50:53 +00:00
|
|
|
from allmydata.util import mathutil
|
2007-01-05 01:06:20 +00:00
|
|
|
from allmydata.util.assertutil import precondition
|
2007-01-05 06:51:35 +00:00
|
|
|
from allmydata.encode import ReplicatingEncoder, PyRSEncoder
|
2006-12-14 03:32:35 +00:00
|
|
|
|
|
|
|
def hash(data):
|
|
|
|
return sha.new(data).digest()
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
The goal of the encoder is to turn the original file into a series of
|
|
|
|
'shares'. Each share is going to a 'shareholder' (nominally each shareholder
|
|
|
|
is a different host, but for small meshes there may be overlap). The number
|
|
|
|
of shares is chosen to hit our reliability goals (more shares on more
|
|
|
|
machines means more reliability), and is limited by overhead (proportional to
|
|
|
|
numshares or log(numshares)) and the encoding technology in use (Reed-Solomon
|
|
|
|
only permits 256 shares total). It is also constrained by the amount of data
|
|
|
|
we want to send to each host. For estimating purposes, think of 100 shares
|
|
|
|
out of which we need 25 to reconstruct the file.
|
|
|
|
|
|
|
|
The encoder starts by cutting the original file into segments. All segments
|
|
|
|
except the last are of equal size. The segment size is chosen to constrain
|
|
|
|
the memory footprint (which will probably vary between 1x and 4x segment
|
|
|
|
size) and to constrain the overhead (which will be proportional to either the
|
|
|
|
number of segments or log(number of segments)).
|
|
|
|
|
|
|
|
|
|
|
|
Each segment (A,B,C) is read into memory, encrypted, and encoded into
|
|
|
|
subshares. The 'share' (say, share #1) that makes it out to a host is a
|
|
|
|
collection of these subshares (subshare A1, B1, C1), plus some hash-tree
|
|
|
|
information necessary to validate the data upon retrieval. Only one segment
|
|
|
|
is handled at a time: all subshares for segment A are delivered before any
|
|
|
|
work is begun on segment B.
|
|
|
|
|
|
|
|
As subshares are created, we retain the hash of each one. The list of
|
|
|
|
subshare hashes for a single share (say, hash(A1), hash(B1), hash(C1)) is
|
|
|
|
used to form the base of a Merkle hash tree for that share (hashtrees[1]).
|
|
|
|
This hash tree has one terminal leaf per subshare. The complete subshare hash
|
|
|
|
tree is sent to the shareholder after all the data has been sent. At
|
|
|
|
retrieval time, the decoder will ask for specific pieces of this tree before
|
|
|
|
asking for subshares, whichever it needs to validate those subshares.
|
|
|
|
|
2006-12-29 19:40:10 +00:00
|
|
|
(Note: we don't really need to generate this whole subshare hash tree
|
2006-12-14 03:32:35 +00:00
|
|
|
ourselves. It would be sufficient to have the shareholder generate it and
|
|
|
|
just tell us the root. This gives us an extra level of validation on the
|
2006-12-29 19:40:10 +00:00
|
|
|
transfer, though, and it is relatively cheap to compute.)
|
2006-12-14 03:32:35 +00:00
|
|
|
|
|
|
|
Each of these subshare hash trees has a root hash. The collection of these
|
|
|
|
root hashes for all shares are collected into the 'share hash tree', which
|
|
|
|
has one terminal leaf per share. After sending the subshares and the complete
|
|
|
|
subshare hash tree to each shareholder, we send them the portion of the share
|
|
|
|
hash tree that is necessary to validate their share. The root of the share
|
|
|
|
hash tree is put into the URI.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2007-01-05 00:58:14 +00:00
|
|
|
def pad(s, l, c='\x00'):
|
|
|
|
"""
|
|
|
|
Return string s with enough chars c appended to it to make its length be
|
|
|
|
an even multiple of l bytes.
|
2006-12-14 03:32:35 +00:00
|
|
|
|
2007-01-05 00:58:14 +00:00
|
|
|
@param s the original string
|
|
|
|
@param l the length of the resulting padded string in bytes
|
|
|
|
@param c the pad char
|
|
|
|
"""
|
|
|
|
return s + c * mathutil.pad_size(len(s), l)
|
2006-12-14 03:32:35 +00:00
|
|
|
|
2007-01-05 06:51:35 +00:00
|
|
|
KiB=1024
|
|
|
|
MiB=1024*KiB
|
|
|
|
GiB=1024*MiB
|
|
|
|
TiB=1024*GiB
|
|
|
|
PiB=1024*TiB
|
|
|
|
|
2006-12-14 03:32:35 +00:00
|
|
|
class Encoder(object):
|
|
|
|
|
|
|
|
def setup(self, infile):
|
|
|
|
self.infile = infile
|
|
|
|
infile.seek(0, 2)
|
|
|
|
self.file_size = infile.tell()
|
|
|
|
infile.seek(0, 0)
|
|
|
|
|
|
|
|
self.num_shares = 100
|
2006-12-29 19:40:10 +00:00
|
|
|
self.required_shares = 25
|
2006-12-29 20:50:53 +00:00
|
|
|
|
2007-01-05 06:51:35 +00:00
|
|
|
self.segment_size = min(2*MiB, self.file_size)
|
2006-12-29 20:50:53 +00:00
|
|
|
self.num_segments = mathutil.div_ceil(self.file_size, self.segment_size)
|
|
|
|
|
2007-01-05 06:51:35 +00:00
|
|
|
def setup_encoder(self):
|
|
|
|
self.encoder = ReplicatingEncoder()
|
|
|
|
self.encoder.set_params(self.segment_size, self.required_shares,
|
|
|
|
self.num_shares)
|
|
|
|
self.share_size = self.encoder.get_share_size()
|
2006-12-14 03:32:35 +00:00
|
|
|
|
2006-12-29 20:50:53 +00:00
|
|
|
|
2006-12-14 03:32:35 +00:00
|
|
|
def get_reservation_size(self):
|
|
|
|
self.num_shares = 100
|
2006-12-29 19:40:10 +00:00
|
|
|
self.share_size = self.file_size / self.required_shares
|
2006-12-14 03:32:35 +00:00
|
|
|
overhead = self.compute_overhead()
|
|
|
|
return self.share_size + overhead
|
|
|
|
|
|
|
|
def setup_encryption(self):
|
|
|
|
self.key = "\x00"*16
|
|
|
|
self.cryptor = AES.new(key=self.key, mode=AES.MODE_CTR,
|
|
|
|
counterstart="\x00"*16)
|
|
|
|
self.segment_num = 0
|
2006-12-29 19:40:10 +00:00
|
|
|
self.subshare_hashes = [[] for x in range(self.num_shares)]
|
2006-12-14 03:32:35 +00:00
|
|
|
# subshare_hashes[i] is a list that will be accumulated and then send
|
|
|
|
# to landlord[i]. This list contains a hash of each segment_share
|
|
|
|
# that we sent to that landlord.
|
|
|
|
self.share_root_hashes = [None] * self.num_shares
|
|
|
|
|
|
|
|
def start(self):
|
|
|
|
self.setup_encryption()
|
2007-01-05 06:51:35 +00:00
|
|
|
self.setup_encoder()
|
2006-12-14 03:32:35 +00:00
|
|
|
d = defer.succeed(None)
|
|
|
|
for i in range(self.num_segments):
|
|
|
|
d.addCallback(lambda res: self.do_segment(i))
|
|
|
|
d.addCallback(lambda res: self.send_all_subshare_hash_trees())
|
|
|
|
d.addCallback(lambda res: self.send_all_share_hash_trees())
|
|
|
|
d.addCallback(lambda res: self.close_all_shareholders())
|
|
|
|
d.addCallback(lambda res: self.done())
|
|
|
|
return d
|
|
|
|
|
|
|
|
def do_segment(self, segnum):
|
|
|
|
segment_plaintext = self.infile.read(self.segment_size)
|
|
|
|
segment_crypttext = self.cryptor.encrypt(segment_plaintext)
|
|
|
|
del segment_plaintext
|
2007-01-05 06:51:35 +00:00
|
|
|
d = self.encoder.encode(segment_crypttext)
|
|
|
|
d.addCallback(self._encoded_segment)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _encoded_segment(self, subshare_tuples):
|
2006-12-14 03:32:35 +00:00
|
|
|
dl = []
|
2007-01-05 06:51:35 +00:00
|
|
|
for share_num,subshare in subshare_tuples:
|
2006-12-14 03:32:35 +00:00
|
|
|
d = self.send_subshare(share_num, self.segment_num, subshare)
|
|
|
|
dl.append(d)
|
|
|
|
self.subshare_hashes[share_num].append(hash(subshare))
|
|
|
|
self.segment_num += 1
|
|
|
|
return defer.DeferredList(dl)
|
|
|
|
|
|
|
|
def send_subshare(self, share_num, segment_num, subshare):
|
|
|
|
#if False:
|
|
|
|
# offset = hash_size + segment_num * segment_size
|
|
|
|
# return self.send(share_num, "write", subshare, offset)
|
|
|
|
return self.send(share_num, "put_subshare", segment_num, subshare)
|
|
|
|
|
|
|
|
def send(self, share_num, methname, *args, **kwargs):
|
|
|
|
ll = self.landlords[share_num]
|
|
|
|
return ll.callRemote(methname, *args, **kwargs)
|
|
|
|
|
|
|
|
def send_all_subshare_hash_trees(self):
|
|
|
|
dl = []
|
|
|
|
for share_num,hashes in enumerate(self.subshare_hashes):
|
|
|
|
# hashes is a list of the hashes of all subshares that were sent
|
|
|
|
# to shareholder[share_num].
|
|
|
|
dl.append(self.send_one_subshare_hash_tree(share_num, hashes))
|
|
|
|
return defer.DeferredList(dl)
|
|
|
|
|
|
|
|
def send_one_subshare_hash_tree(self, share_num, subshare_hashes):
|
|
|
|
t = HashTree(subshare_hashes)
|
|
|
|
all_hashes = list(t)
|
|
|
|
# all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
|
|
|
|
# all_hashes[1] is the left child, == hash(ah[3]+ah[4])
|
|
|
|
# all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
|
|
|
|
self.share_root_hashes[share_num] = t[0]
|
|
|
|
if False:
|
|
|
|
block = "".join(all_hashes)
|
2006-12-14 11:17:01 +00:00
|
|
|
return self.send(share_num, "write", block, offset=0)
|
|
|
|
return self.send(share_num, "put_subshare_hashes", all_hashes)
|
2006-12-14 03:32:35 +00:00
|
|
|
|
|
|
|
def send_all_share_hash_trees(self):
|
|
|
|
dl = []
|
|
|
|
for h in self.share_root_hashes:
|
|
|
|
assert h
|
|
|
|
# create the share hash tree
|
|
|
|
t = HashTree(self.share_root_hashes)
|
|
|
|
# the root of this hash tree goes into our URI
|
|
|
|
self.root_hash = t[0]
|
|
|
|
# now send just the necessary pieces out to each shareholder
|
|
|
|
for i in range(self.num_shares):
|
2006-12-14 11:31:17 +00:00
|
|
|
# the HashTree is given a list of leaves: 0,1,2,3..n .
|
|
|
|
# These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
|
|
|
|
tree_width = roundup_pow2(self.num_shares)
|
|
|
|
base_index = i + tree_width - 1
|
|
|
|
needed_hash_indices = t.needed_for(base_index)
|
|
|
|
hashes = [(hi, t[hi]) for hi in needed_hash_indices]
|
|
|
|
dl.append(self.send_one_share_hash_tree(i, hashes))
|
2006-12-14 03:32:35 +00:00
|
|
|
return defer.DeferredList(dl)
|
|
|
|
|
|
|
|
def send_one_share_hash_tree(self, share_num, needed_hashes):
|
2006-12-14 11:17:01 +00:00
|
|
|
return self.send(share_num, "put_share_hashes", needed_hashes)
|
2006-12-14 03:32:35 +00:00
|
|
|
|
|
|
|
def close_all_shareholders(self):
|
|
|
|
dl = []
|
2006-12-14 11:17:01 +00:00
|
|
|
for share_num in range(self.num_shares):
|
|
|
|
dl.append(self.send(share_num, "close"))
|
2006-12-14 03:32:35 +00:00
|
|
|
return defer.DeferredList(dl)
|
|
|
|
|
|
|
|
def done(self):
|
|
|
|
return self.root_hash
|
|
|
|
|
|
|
|
|
|
|
|
from foolscap import RemoteInterface
|
|
|
|
from foolscap.schema import ListOf, TupleOf, Nothing
|
|
|
|
_None = Nothing()
|
|
|
|
|
|
|
|
|
|
|
|
class RIStorageBucketWriter(RemoteInterface):
|
|
|
|
def put_subshare(segment_number=int, subshare=str):
|
|
|
|
return _None
|
|
|
|
def put_segment_hashes(all_hashes=ListOf(str)):
|
|
|
|
return _None
|
|
|
|
def put_share_hashes(needed_hashes=ListOf(TupleOf(int,str))):
|
|
|
|
return _None
|
|
|
|
#def write(data=str, offset=int):
|
|
|
|
# return _None
|
|
|
|
class RIStorageBucketReader(RemoteInterface):
|
|
|
|
def get_share_hashes():
|
|
|
|
return ListOf(TupleOf(int,str))
|
|
|
|
def get_segment_hashes(which=ListOf(int)):
|
|
|
|
return ListOf(str)
|
|
|
|
def get_subshare(segment_number=int):
|
|
|
|
return str
|
|
|
|
#def read(size=int, offset=int):
|
|
|
|
# return str
|
2006-12-14 03:35:12 +00:00
|
|
|
|
|
|
|
"figleaf doesn't like the last line of the file to be a comment"
|