2006-12-01 09:54:28 +00:00
|
|
|
|
2007-01-21 22:01:34 +00:00
|
|
|
from zope.interface import implements
|
2006-12-03 07:53:53 +00:00
|
|
|
from twisted.python import failure, log
|
2006-12-01 09:54:28 +00:00
|
|
|
from twisted.internet import defer
|
2006-12-03 01:27:18 +00:00
|
|
|
from twisted.application import service
|
2006-12-04 02:07:41 +00:00
|
|
|
from foolscap import Referenceable
|
2006-12-03 01:27:18 +00:00
|
|
|
|
2007-03-28 05:57:15 +00:00
|
|
|
from allmydata.util import idlib, bencode, mathutil
|
2007-01-17 02:35:12 +00:00
|
|
|
from allmydata.util.idlib import peerid_to_short_string as shortid
|
2007-01-16 04:22:22 +00:00
|
|
|
from allmydata.util.deferredutil import DeferredListShouldSucceed
|
2007-01-12 03:51:27 +00:00
|
|
|
from allmydata import codec
|
2007-01-17 04:29:59 +00:00
|
|
|
from allmydata.uri import pack_uri
|
2007-01-21 22:01:34 +00:00
|
|
|
from allmydata.interfaces import IUploadable, IUploader
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2006-12-03 03:31:43 +00:00
|
|
|
from cStringIO import StringIO
|
|
|
|
import sha
|
|
|
|
|
2006-12-01 09:54:28 +00:00
|
|
|
class NotEnoughPeersError(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class HaveAllPeersError(Exception):
|
|
|
|
# we use this to jump out of the loop
|
|
|
|
pass
|
|
|
|
|
|
|
|
# this wants to live in storage, not here
|
|
|
|
class TooFullError(Exception):
|
|
|
|
pass
|
|
|
|
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
class FileUploader:
|
2006-12-01 09:54:28 +00:00
|
|
|
debug = False
|
2007-03-28 07:05:16 +00:00
|
|
|
ENCODERCLASS = codec.CRSEncoder
|
|
|
|
|
2006-12-01 09:54:28 +00:00
|
|
|
|
|
|
|
def __init__(self, peer):
|
|
|
|
self._peer = peer
|
|
|
|
|
2007-01-16 04:22:22 +00:00
|
|
|
def set_params(self, min_shares, target_goodness, max_shares):
|
|
|
|
self.min_shares = min_shares
|
|
|
|
self.target_goodness = target_goodness
|
|
|
|
self.max_shares = max_shares
|
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
def set_filehandle(self, filehandle):
|
|
|
|
self._filehandle = filehandle
|
|
|
|
filehandle.seek(0, 2)
|
|
|
|
self._size = filehandle.tell()
|
|
|
|
filehandle.seek(0)
|
|
|
|
|
2006-12-01 09:54:28 +00:00
|
|
|
def set_verifierid(self, vid):
|
|
|
|
assert isinstance(vid, str)
|
2007-01-16 04:22:22 +00:00
|
|
|
assert len(vid) == 20
|
2006-12-01 09:54:28 +00:00
|
|
|
self._verifierid = vid
|
|
|
|
|
|
|
|
|
|
|
|
def start(self):
|
2007-01-16 04:22:22 +00:00
|
|
|
"""Start uploading the file.
|
|
|
|
|
|
|
|
The source of the data to be uploaded must have been set before this
|
|
|
|
point by calling set_filehandle().
|
|
|
|
|
|
|
|
This method returns a Deferred that will fire with the URI (a
|
|
|
|
string)."""
|
|
|
|
|
|
|
|
log.msg("starting upload [%s]" % (idlib.b2a(self._verifierid),))
|
2006-12-03 03:31:02 +00:00
|
|
|
if self.debug:
|
|
|
|
print "starting upload"
|
2007-01-16 04:22:22 +00:00
|
|
|
assert self.min_shares
|
|
|
|
assert self.target_goodness
|
|
|
|
|
|
|
|
# create the encoder, so we can know how large the shares will be
|
|
|
|
total_shares = self.max_shares
|
|
|
|
needed_shares = self.min_shares
|
2007-03-28 07:05:16 +00:00
|
|
|
self._encoder = self.ENCODERCLASS()
|
2007-01-17 04:29:59 +00:00
|
|
|
self._codec_name = self._encoder.get_encoder_type()
|
2007-03-28 05:57:15 +00:00
|
|
|
self._needed_shares = needed_shares
|
|
|
|
paddedsize = self._size + mathutil.pad_size(self._size, needed_shares)
|
|
|
|
self._encoder.set_params(paddedsize, needed_shares, total_shares)
|
2007-01-16 04:22:22 +00:00
|
|
|
self._share_size = self._encoder.get_share_size()
|
|
|
|
|
2006-12-01 11:06:11 +00:00
|
|
|
# first step: who should we upload to?
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2007-01-16 04:22:22 +00:00
|
|
|
# We will talk to at most max_peers (which can be None to mean no
|
|
|
|
# limit). Maybe limit max_peers to 2*len(self.shares), to reduce
|
|
|
|
# memory footprint. For now, make it unlimited.
|
2006-12-01 09:54:28 +00:00
|
|
|
max_peers = None
|
|
|
|
|
|
|
|
self.permuted = self._peer.permute_peerids(self._verifierid, max_peers)
|
2007-01-17 01:47:52 +00:00
|
|
|
self.peers_who_said_yes = []
|
|
|
|
self.peers_who_said_no = []
|
|
|
|
self.peers_who_had_errors = []
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2006-12-04 02:07:41 +00:00
|
|
|
self._total_peers = len(self.permuted)
|
2006-12-03 03:31:02 +00:00
|
|
|
for p in self.permuted:
|
|
|
|
assert isinstance(p, str)
|
2006-12-01 09:54:28 +00:00
|
|
|
# we will shrink self.permuted as we give up on peers
|
|
|
|
|
2007-01-17 02:35:12 +00:00
|
|
|
d = defer.maybeDeferred(self._find_peers)
|
2007-01-16 04:22:22 +00:00
|
|
|
d.addCallback(self._got_enough_peers)
|
|
|
|
d.addCallback(self._compute_uri)
|
2006-12-01 09:54:28 +00:00
|
|
|
return d
|
|
|
|
|
2007-01-16 04:22:22 +00:00
|
|
|
def _compute_uri(self, params):
|
2007-01-17 04:29:59 +00:00
|
|
|
return pack_uri(self._codec_name, params, self._verifierid)
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2007-01-17 02:35:12 +00:00
|
|
|
def _build_not_enough_peers_error(self):
|
|
|
|
yes = ",".join([shortid(p) for p in self.peers_who_said_yes])
|
|
|
|
no = ",".join([shortid(p) for p in self.peers_who_said_no])
|
|
|
|
err = ",".join([shortid(p) for p in self.peers_who_had_errors])
|
|
|
|
msg = ("%s goodness, want %s, have %d "
|
|
|
|
"landlords, %d total peers, "
|
|
|
|
"peers:yes=%s;no=%s;err=%s" %
|
|
|
|
(self.goodness_points, self.target_goodness,
|
|
|
|
len(self.landlords), self._total_peers,
|
|
|
|
yes, no, err))
|
|
|
|
return msg
|
|
|
|
|
|
|
|
def _find_peers(self):
|
|
|
|
# this returns a Deferred which fires (with a meaningless value) when
|
|
|
|
# enough peers are found, or errbacks with a NotEnoughPeersError if
|
|
|
|
# not.
|
|
|
|
self.peer_index = 0
|
|
|
|
self.goodness_points = 0
|
|
|
|
self.landlords = [] # list of (peerid, bucket_num, remotebucket)
|
|
|
|
return self._check_next_peer()
|
|
|
|
|
2006-12-01 09:54:28 +00:00
|
|
|
def _check_next_peer(self):
|
2007-01-16 04:22:22 +00:00
|
|
|
if self.debug:
|
|
|
|
log.msg("FileUploader._check_next_peer: %d permuted, %d goodness"
|
|
|
|
" (want %d), have %d landlords, %d total peers" %
|
|
|
|
(len(self.permuted), self.goodness_points,
|
|
|
|
self.target_goodness, len(self.landlords),
|
|
|
|
self._total_peers))
|
2007-01-17 02:35:12 +00:00
|
|
|
if (self.goodness_points >= self.target_goodness and
|
|
|
|
len(self.landlords) >= self.min_shares):
|
|
|
|
if self.debug: print " we're done!"
|
|
|
|
return "done"
|
|
|
|
if not self.permuted:
|
|
|
|
# we've run out of peers to check without finding enough, which
|
|
|
|
# means we won't be able to upload this file. Bummer.
|
|
|
|
msg = self._build_not_enough_peers_error()
|
2007-01-17 01:47:52 +00:00
|
|
|
log.msg("NotEnoughPeersError: %s" % msg)
|
|
|
|
raise NotEnoughPeersError(msg)
|
2007-01-17 02:35:12 +00:00
|
|
|
|
|
|
|
# otherwise we use self.peer_index to rotate through all the usable
|
|
|
|
# peers. It gets inremented elsewhere, but wrapped here.
|
2006-12-01 09:54:28 +00:00
|
|
|
if self.peer_index >= len(self.permuted):
|
|
|
|
self.peer_index = 0
|
|
|
|
|
|
|
|
peerid = self.permuted[self.peer_index]
|
|
|
|
|
2007-01-17 02:35:12 +00:00
|
|
|
d = self._check_peer(peerid)
|
|
|
|
d.addCallback(lambda res: self._check_next_peer())
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _check_peer(self, peerid):
|
|
|
|
# contact a single peer, and ask them to hold a share. If they say
|
|
|
|
# yes, we update self.landlords and self.goodness_points, and
|
|
|
|
# increment self.peer_index. If they say no, or are uncontactable, we
|
|
|
|
# remove them from self.permuted. This returns a Deferred which never
|
|
|
|
# errbacks.
|
|
|
|
|
|
|
|
bucket_num = len(self.landlords)
|
2006-12-01 09:54:28 +00:00
|
|
|
d = self._peer.get_remote_service(peerid, "storageserver")
|
|
|
|
def _got_peer(service):
|
2007-01-17 02:35:12 +00:00
|
|
|
if self.debug: print "asking %s" % shortid(peerid)
|
2006-12-01 09:54:28 +00:00
|
|
|
d2 = service.callRemote("allocate_bucket",
|
|
|
|
verifierid=self._verifierid,
|
|
|
|
bucket_num=bucket_num,
|
|
|
|
size=self._share_size,
|
2006-12-04 02:07:41 +00:00
|
|
|
leaser=self._peer.nodeid,
|
|
|
|
canary=Referenceable())
|
2006-12-01 09:54:28 +00:00
|
|
|
return d2
|
|
|
|
d.addCallback(_got_peer)
|
2007-01-17 02:35:12 +00:00
|
|
|
|
|
|
|
def _allocate_response(bucket):
|
|
|
|
if self.debug:
|
|
|
|
print " peerid %s will grant us a lease" % shortid(peerid)
|
|
|
|
self.peers_who_said_yes.append(peerid)
|
|
|
|
self.landlords.append( (peerid, bucket_num, bucket) )
|
|
|
|
self.goodness_points += 1
|
|
|
|
self.peer_index += 1
|
|
|
|
|
|
|
|
d.addCallback(_allocate_response)
|
|
|
|
|
|
|
|
def _err(f):
|
|
|
|
if self.debug: print "err from peer %s:" % idlib.b2a(peerid)
|
|
|
|
assert isinstance(f, failure.Failure)
|
|
|
|
if f.check(TooFullError):
|
|
|
|
if self.debug: print " too full"
|
|
|
|
self.peers_who_said_no.append(peerid)
|
|
|
|
elif f.check(IndexError):
|
|
|
|
if self.debug: print " no connection"
|
|
|
|
self.peers_who_had_errors.append(peerid)
|
2006-12-01 09:54:28 +00:00
|
|
|
else:
|
2007-01-17 02:42:28 +00:00
|
|
|
if self.debug: print " other error:", f
|
2007-01-17 02:35:12 +00:00
|
|
|
self.peers_who_had_errors.append(peerid)
|
|
|
|
log.msg("FileUploader._check_peer(%s): err" % shortid(peerid))
|
|
|
|
log.msg(f)
|
|
|
|
self.permuted.remove(peerid) # this peer was unusable
|
|
|
|
return None
|
|
|
|
d.addErrback(_err)
|
2006-12-01 09:54:28 +00:00
|
|
|
return d
|
|
|
|
|
2007-01-16 04:22:22 +00:00
|
|
|
def _got_enough_peers(self, res):
|
|
|
|
landlords = self.landlords
|
|
|
|
if self.debug:
|
|
|
|
log.msg("FileUploader._got_enough_peers")
|
|
|
|
log.msg(" %d landlords" % len(landlords))
|
|
|
|
if len(landlords) < 20:
|
|
|
|
log.msg(" peerids: %s" % " ".join([idlib.b2a(l[0])
|
|
|
|
for l in landlords]))
|
|
|
|
log.msg(" buckets: %s" % " ".join([str(l[1])
|
|
|
|
for l in landlords]))
|
|
|
|
# assign shares to landlords
|
|
|
|
self.sharemap = {}
|
|
|
|
for peerid, bucket_num, bucket in landlords:
|
|
|
|
self.sharemap[bucket_num] = bucket
|
|
|
|
# the sharemap should have exactly len(landlords) shares, with
|
|
|
|
# no holes
|
|
|
|
assert sorted(self.sharemap.keys()) == range(len(landlords))
|
|
|
|
# encode all the data at once: this class does not use segmentation
|
|
|
|
data = self._filehandle.read()
|
2007-03-28 05:57:15 +00:00
|
|
|
|
|
|
|
# xyz i am about to go away anyway.
|
|
|
|
chunksize = mathutil.div_ceil(len(data), self._needed_shares)
|
|
|
|
numchunks = mathutil.div_ceil(len(data), chunksize)
|
|
|
|
l = [ data[i:i+chunksize] for i in range(0, len(data), chunksize) ]
|
|
|
|
# padding
|
|
|
|
if len(l[-1]) != len(l[0]):
|
|
|
|
l[-1] = l[-1] + ('\x00'*(len(l[0])-len(l[-1])))
|
|
|
|
d = self._encoder.encode(l, self.sharemap.keys())
|
2007-01-16 04:22:22 +00:00
|
|
|
d.addCallback(self._send_all_shares)
|
|
|
|
d.addCallback(lambda res: self._encoder.get_serialized_params())
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _send_one_share(self, bucket, sharedata, metadata):
|
|
|
|
d = bucket.callRemote("write", sharedata)
|
|
|
|
d.addCallback(lambda res:
|
|
|
|
bucket.callRemote("set_metadata", metadata))
|
|
|
|
d.addCallback(lambda res:
|
|
|
|
bucket.callRemote("close"))
|
2006-12-01 09:54:28 +00:00
|
|
|
return d
|
|
|
|
|
2007-02-01 23:07:00 +00:00
|
|
|
def _send_all_shares(self, (shares, shareids)):
|
2007-01-16 04:22:22 +00:00
|
|
|
dl = []
|
2007-02-01 23:07:00 +00:00
|
|
|
for (shareid, share) in zip(shareids, shares):
|
2007-01-16 04:22:22 +00:00
|
|
|
if self.debug:
|
2007-02-01 23:07:00 +00:00
|
|
|
log.msg(" writing share %d" % shareid)
|
|
|
|
metadata = bencode.bencode(shareid)
|
|
|
|
assert len(share) == self._share_size
|
|
|
|
assert isinstance(share, str)
|
|
|
|
bucket = self.sharemap[shareid]
|
|
|
|
d = self._send_one_share(bucket, share, metadata)
|
2007-01-16 04:22:22 +00:00
|
|
|
dl.append(d)
|
|
|
|
return DeferredListShouldSucceed(dl)
|
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
def netstring(s):
|
|
|
|
return "%d:%s," % (len(s), s)
|
|
|
|
|
2006-12-04 02:07:41 +00:00
|
|
|
class FileName:
|
|
|
|
implements(IUploadable)
|
|
|
|
def __init__(self, filename):
|
|
|
|
self._filename = filename
|
|
|
|
def get_filehandle(self):
|
|
|
|
return open(self._filename, "rb")
|
|
|
|
def close_filehandle(self, f):
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
class Data:
|
|
|
|
implements(IUploadable)
|
|
|
|
def __init__(self, data):
|
|
|
|
self._data = data
|
|
|
|
def get_filehandle(self):
|
|
|
|
return StringIO(self._data)
|
|
|
|
def close_filehandle(self, f):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class FileHandle:
|
|
|
|
implements(IUploadable)
|
|
|
|
def __init__(self, filehandle):
|
|
|
|
self._filehandle = filehandle
|
|
|
|
def get_filehandle(self):
|
|
|
|
return self._filehandle
|
|
|
|
def close_filehandle(self, f):
|
|
|
|
# the originator of the filehandle reserves the right to close it
|
|
|
|
pass
|
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
class Uploader(service.MultiService):
|
|
|
|
"""I am a service that allows file uploading.
|
|
|
|
"""
|
2007-01-21 22:01:34 +00:00
|
|
|
implements(IUploader)
|
2006-12-03 01:27:18 +00:00
|
|
|
name = "uploader"
|
2007-01-16 04:22:22 +00:00
|
|
|
uploader_class = FileUploader
|
|
|
|
debug = False
|
2006-12-03 01:27:18 +00:00
|
|
|
|
2006-12-03 03:31:43 +00:00
|
|
|
def _compute_verifierid(self, f):
|
2006-12-03 01:27:18 +00:00
|
|
|
hasher = sha.new(netstring("allmydata_v1_verifierid"))
|
|
|
|
f.seek(0)
|
2007-01-16 04:22:22 +00:00
|
|
|
data = f.read()
|
|
|
|
hasher.update(data)#f.read())
|
2006-12-03 01:27:18 +00:00
|
|
|
f.seek(0)
|
|
|
|
# note: this is only of the plaintext data, no encryption yet
|
|
|
|
return hasher.digest()
|
|
|
|
|
2006-12-04 02:07:41 +00:00
|
|
|
def upload(self, f):
|
2007-01-19 09:23:03 +00:00
|
|
|
# this returns the URI
|
2006-12-03 01:27:18 +00:00
|
|
|
assert self.parent
|
|
|
|
assert self.running
|
2006-12-04 02:07:41 +00:00
|
|
|
f = IUploadable(f)
|
|
|
|
fh = f.get_filehandle()
|
2007-01-16 04:22:22 +00:00
|
|
|
u = self.uploader_class(self.parent)
|
|
|
|
if self.debug:
|
|
|
|
u.debug = True
|
2006-12-04 02:07:41 +00:00
|
|
|
u.set_filehandle(fh)
|
2007-01-16 22:15:26 +00:00
|
|
|
# push two shares, require that we get two back. TODO: this is
|
|
|
|
# temporary, of course.
|
|
|
|
u.set_params(2, 2, 4)
|
2006-12-04 02:07:41 +00:00
|
|
|
u.set_verifierid(self._compute_verifierid(fh))
|
2006-12-03 01:27:18 +00:00
|
|
|
d = u.start()
|
2006-12-04 02:07:41 +00:00
|
|
|
def _done(res):
|
|
|
|
f.close_filehandle(fh)
|
|
|
|
return res
|
|
|
|
d.addBoth(_done)
|
2006-12-03 01:27:18 +00:00
|
|
|
return d
|
|
|
|
|
2006-12-04 02:07:41 +00:00
|
|
|
# utility functions
|
|
|
|
def upload_data(self, data):
|
|
|
|
return self.upload(Data(data))
|
|
|
|
def upload_filename(self, filename):
|
|
|
|
return self.upload(FileName(filename))
|
|
|
|
def upload_filehandle(self, filehandle):
|
|
|
|
return self.upload(FileHandle(filehandle))
|