tahoe-lafs/src/allmydata/upload.py

333 lines
12 KiB
Python
Raw Normal View History

2006-12-01 09:54:28 +00:00
from zope.interface import implements
2006-12-03 07:53:53 +00:00
from twisted.python import failure, log
2006-12-01 09:54:28 +00:00
from twisted.internet import defer
from twisted.application import service
from foolscap import Referenceable
from allmydata.util import idlib, bencode, mathutil
from allmydata.util.idlib import peerid_to_short_string as shortid
from allmydata.util.deferredutil import DeferredListShouldSucceed
from allmydata import codec
2007-01-17 04:29:59 +00:00
from allmydata.uri import pack_uri
from allmydata.interfaces import IUploadable, IUploader
2006-12-01 09:54:28 +00:00
2006-12-03 03:31:43 +00:00
from cStringIO import StringIO
import sha
2006-12-01 09:54:28 +00:00
class NotEnoughPeersError(Exception):
pass
class HaveAllPeersError(Exception):
# we use this to jump out of the loop
pass
# this wants to live in storage, not here
class TooFullError(Exception):
pass
class FileUploader:
2006-12-01 09:54:28 +00:00
debug = False
2007-03-28 07:05:16 +00:00
ENCODERCLASS = codec.CRSEncoder
2006-12-01 09:54:28 +00:00
def __init__(self, peer):
self._peer = peer
def set_params(self, min_shares, target_goodness, max_shares):
self.min_shares = min_shares
self.target_goodness = target_goodness
self.max_shares = max_shares
def set_filehandle(self, filehandle):
self._filehandle = filehandle
filehandle.seek(0, 2)
self._size = filehandle.tell()
filehandle.seek(0)
2006-12-01 09:54:28 +00:00
def set_verifierid(self, vid):
assert isinstance(vid, str)
assert len(vid) == 20
2006-12-01 09:54:28 +00:00
self._verifierid = vid
def start(self):
"""Start uploading the file.
The source of the data to be uploaded must have been set before this
point by calling set_filehandle().
This method returns a Deferred that will fire with the URI (a
string)."""
log.msg("starting upload [%s]" % (idlib.b2a(self._verifierid),))
if self.debug:
print "starting upload"
assert self.min_shares
assert self.target_goodness
# create the encoder, so we can know how large the shares will be
total_shares = self.max_shares
needed_shares = self.min_shares
2007-03-28 07:05:16 +00:00
self._encoder = self.ENCODERCLASS()
2007-01-17 04:29:59 +00:00
self._codec_name = self._encoder.get_encoder_type()
self._needed_shares = needed_shares
paddedsize = self._size + mathutil.pad_size(self._size, needed_shares)
self._encoder.set_params(paddedsize, needed_shares, total_shares)
self._share_size = self._encoder.get_share_size()
2006-12-01 11:06:11 +00:00
# first step: who should we upload to?
2006-12-01 09:54:28 +00:00
# We will talk to at most max_peers (which can be None to mean no
# limit). Maybe limit max_peers to 2*len(self.shares), to reduce
# memory footprint. For now, make it unlimited.
2006-12-01 09:54:28 +00:00
max_peers = None
self.permuted = self._peer.permute_peerids(self._verifierid, max_peers)
self.peers_who_said_yes = []
self.peers_who_said_no = []
self.peers_who_had_errors = []
self._total_peers = len(self.permuted)
for p in self.permuted:
assert isinstance(p, str)
2006-12-01 09:54:28 +00:00
# we will shrink self.permuted as we give up on peers
d = defer.maybeDeferred(self._find_peers)
d.addCallback(self._got_enough_peers)
d.addCallback(self._compute_uri)
2006-12-01 09:54:28 +00:00
return d
def _compute_uri(self, params):
2007-01-17 04:29:59 +00:00
return pack_uri(self._codec_name, params, self._verifierid)
def _build_not_enough_peers_error(self):
yes = ",".join([shortid(p) for p in self.peers_who_said_yes])
no = ",".join([shortid(p) for p in self.peers_who_said_no])
err = ",".join([shortid(p) for p in self.peers_who_had_errors])
msg = ("%s goodness, want %s, have %d "
"landlords, %d total peers, "
"peers:yes=%s;no=%s;err=%s" %
(self.goodness_points, self.target_goodness,
len(self.landlords), self._total_peers,
yes, no, err))
return msg
def _find_peers(self):
# this returns a Deferred which fires (with a meaningless value) when
# enough peers are found, or errbacks with a NotEnoughPeersError if
# not.
self.peer_index = 0
self.goodness_points = 0
self.landlords = [] # list of (peerid, bucket_num, remotebucket)
return self._check_next_peer()
2006-12-01 09:54:28 +00:00
def _check_next_peer(self):
if self.debug:
log.msg("FileUploader._check_next_peer: %d permuted, %d goodness"
" (want %d), have %d landlords, %d total peers" %
(len(self.permuted), self.goodness_points,
self.target_goodness, len(self.landlords),
self._total_peers))
if (self.goodness_points >= self.target_goodness and
len(self.landlords) >= self.min_shares):
if self.debug: print " we're done!"
return "done"
if not self.permuted:
# we've run out of peers to check without finding enough, which
# means we won't be able to upload this file. Bummer.
msg = self._build_not_enough_peers_error()
log.msg("NotEnoughPeersError: %s" % msg)
raise NotEnoughPeersError(msg)
# otherwise we use self.peer_index to rotate through all the usable
# peers. It gets inremented elsewhere, but wrapped here.
2006-12-01 09:54:28 +00:00
if self.peer_index >= len(self.permuted):
self.peer_index = 0
peerid = self.permuted[self.peer_index]
d = self._check_peer(peerid)
d.addCallback(lambda res: self._check_next_peer())
return d
def _check_peer(self, peerid):
# contact a single peer, and ask them to hold a share. If they say
# yes, we update self.landlords and self.goodness_points, and
# increment self.peer_index. If they say no, or are uncontactable, we
# remove them from self.permuted. This returns a Deferred which never
# errbacks.
bucket_num = len(self.landlords)
2006-12-01 09:54:28 +00:00
d = self._peer.get_remote_service(peerid, "storageserver")
def _got_peer(service):
if self.debug: print "asking %s" % shortid(peerid)
2006-12-01 09:54:28 +00:00
d2 = service.callRemote("allocate_bucket",
verifierid=self._verifierid,
bucket_num=bucket_num,
size=self._share_size,
leaser=self._peer.nodeid,
canary=Referenceable())
2006-12-01 09:54:28 +00:00
return d2
d.addCallback(_got_peer)
def _allocate_response(bucket):
if self.debug:
print " peerid %s will grant us a lease" % shortid(peerid)
self.peers_who_said_yes.append(peerid)
self.landlords.append( (peerid, bucket_num, bucket) )
self.goodness_points += 1
self.peer_index += 1
d.addCallback(_allocate_response)
def _err(f):
if self.debug: print "err from peer %s:" % idlib.b2a(peerid)
assert isinstance(f, failure.Failure)
if f.check(TooFullError):
if self.debug: print " too full"
self.peers_who_said_no.append(peerid)
elif f.check(IndexError):
if self.debug: print " no connection"
self.peers_who_had_errors.append(peerid)
2006-12-01 09:54:28 +00:00
else:
2007-01-17 02:42:28 +00:00
if self.debug: print " other error:", f
self.peers_who_had_errors.append(peerid)
log.msg("FileUploader._check_peer(%s): err" % shortid(peerid))
log.msg(f)
self.permuted.remove(peerid) # this peer was unusable
return None
d.addErrback(_err)
2006-12-01 09:54:28 +00:00
return d
def _got_enough_peers(self, res):
landlords = self.landlords
if self.debug:
log.msg("FileUploader._got_enough_peers")
log.msg(" %d landlords" % len(landlords))
if len(landlords) < 20:
log.msg(" peerids: %s" % " ".join([idlib.b2a(l[0])
for l in landlords]))
log.msg(" buckets: %s" % " ".join([str(l[1])
for l in landlords]))
# assign shares to landlords
self.sharemap = {}
for peerid, bucket_num, bucket in landlords:
self.sharemap[bucket_num] = bucket
# the sharemap should have exactly len(landlords) shares, with
# no holes
assert sorted(self.sharemap.keys()) == range(len(landlords))
# encode all the data at once: this class does not use segmentation
data = self._filehandle.read()
# xyz i am about to go away anyway.
chunksize = mathutil.div_ceil(len(data), self._needed_shares)
numchunks = mathutil.div_ceil(len(data), chunksize)
l = [ data[i:i+chunksize] for i in range(0, len(data), chunksize) ]
# padding
if len(l[-1]) != len(l[0]):
l[-1] = l[-1] + ('\x00'*(len(l[0])-len(l[-1])))
d = self._encoder.encode(l, self.sharemap.keys())
d.addCallback(self._send_all_shares)
d.addCallback(lambda res: self._encoder.get_serialized_params())
return d
def _send_one_share(self, bucket, sharedata, metadata):
d = bucket.callRemote("write", sharedata)
d.addCallback(lambda res:
bucket.callRemote("set_metadata", metadata))
d.addCallback(lambda res:
bucket.callRemote("close"))
2006-12-01 09:54:28 +00:00
return d
def _send_all_shares(self, (shares, shareids)):
dl = []
for (shareid, share) in zip(shareids, shares):
if self.debug:
log.msg(" writing share %d" % shareid)
metadata = bencode.bencode(shareid)
assert len(share) == self._share_size
assert isinstance(share, str)
bucket = self.sharemap[shareid]
d = self._send_one_share(bucket, share, metadata)
dl.append(d)
return DeferredListShouldSucceed(dl)
def netstring(s):
return "%d:%s," % (len(s), s)
class FileName:
implements(IUploadable)
def __init__(self, filename):
self._filename = filename
def get_filehandle(self):
return open(self._filename, "rb")
def close_filehandle(self, f):
f.close()
class Data:
implements(IUploadable)
def __init__(self, data):
self._data = data
def get_filehandle(self):
return StringIO(self._data)
def close_filehandle(self, f):
pass
class FileHandle:
implements(IUploadable)
def __init__(self, filehandle):
self._filehandle = filehandle
def get_filehandle(self):
return self._filehandle
def close_filehandle(self, f):
# the originator of the filehandle reserves the right to close it
pass
class Uploader(service.MultiService):
"""I am a service that allows file uploading.
"""
implements(IUploader)
name = "uploader"
uploader_class = FileUploader
debug = False
2006-12-03 03:31:43 +00:00
def _compute_verifierid(self, f):
hasher = sha.new(netstring("allmydata_v1_verifierid"))
f.seek(0)
data = f.read()
hasher.update(data)#f.read())
f.seek(0)
# note: this is only of the plaintext data, no encryption yet
return hasher.digest()
def upload(self, f):
# this returns the URI
assert self.parent
assert self.running
f = IUploadable(f)
fh = f.get_filehandle()
u = self.uploader_class(self.parent)
if self.debug:
u.debug = True
u.set_filehandle(fh)
# push two shares, require that we get two back. TODO: this is
# temporary, of course.
u.set_params(2, 2, 4)
u.set_verifierid(self._compute_verifierid(fh))
d = u.start()
def _done(res):
f.close_filehandle(fh)
return res
d.addBoth(_done)
return d
# utility functions
def upload_data(self, data):
return self.upload(Data(data))
def upload_filename(self, filename):
return self.upload(FileName(filename))
def upload_filehandle(self, filehandle):
return self.upload(FileHandle(filehandle))