tahoe-lafs/src/allmydata/upload.py

282 lines
9.9 KiB
Python
Raw Normal View History

from zope.interface import implements
2007-03-30 03:19:52 +00:00
from twisted.python import log
2006-12-01 09:54:28 +00:00
from twisted.internet import defer
from twisted.application import service
from foolscap import Referenceable
2007-03-30 03:19:52 +00:00
from allmydata.util import idlib, mathutil
from allmydata import codec
2007-01-17 04:29:59 +00:00
from allmydata.uri import pack_uri
from allmydata.interfaces import IUploadable, IUploader
2006-12-01 09:54:28 +00:00
2006-12-03 03:31:43 +00:00
from cStringIO import StringIO
2007-03-30 03:19:52 +00:00
import collections, random, sha
2006-12-03 03:31:43 +00:00
2006-12-01 09:54:28 +00:00
class NotEnoughPeersError(Exception):
pass
class HaveAllPeersError(Exception):
# we use this to jump out of the loop
pass
# this wants to live in storage, not here
class TooFullError(Exception):
pass
2007-03-30 03:19:52 +00:00
class PeerTracker:
def __init__(self, peerid, connection, sharesize, blocksize, verifierid):
self.peerid = peerid
self.connection = connection
self.buckets = {} # k: shareid, v: IRemoteBucketWriter
self.sharesize = sharesize
self.blocksize = blocksize
self.verifierid = verifierid
def query(self, sharenums):
d = self.connection.callRemote("allocate_buckets", self._verifierid,
sharenums, self.sharesize,
self.blocksize, canary=Referenceable())
d.addCallback(self._got_reply)
return d
def _got_reply(self, (alreadygot, buckets)):
self.buckets.update(buckets)
return (alreadygot, set(buckets.keys()))
class FileUploader:
2006-12-01 09:54:28 +00:00
debug = False
2007-03-28 07:05:16 +00:00
ENCODERCLASS = codec.CRSEncoder
2006-12-01 09:54:28 +00:00
2007-03-30 03:19:52 +00:00
def __init__(self, client):
self._client = client
2006-12-01 09:54:28 +00:00
2007-03-30 03:19:52 +00:00
def set_params(self, needed_shares, shares_of_happiness, total_shares):
self.needed_shares = needed_shares
self.shares_of_happiness = shares_of_happiness
self.total_shares = total_shares
def set_filehandle(self, filehandle):
self._filehandle = filehandle
filehandle.seek(0, 2)
self._size = filehandle.tell()
filehandle.seek(0)
2006-12-01 09:54:28 +00:00
def set_verifierid(self, vid):
assert isinstance(vid, str)
assert len(vid) == 20
2006-12-01 09:54:28 +00:00
self._verifierid = vid
def start(self):
"""Start uploading the file.
The source of the data to be uploaded must have been set before this
point by calling set_filehandle().
This method returns a Deferred that will fire with the URI (a
string)."""
log.msg("starting upload [%s]" % (idlib.b2a(self._verifierid),))
if self.debug:
print "starting upload"
2007-03-30 03:19:52 +00:00
assert self.needed_shares
# create the encoder, so we can know how large the shares will be
2007-03-28 07:05:16 +00:00
self._encoder = self.ENCODERCLASS()
self._last_seg_encoder = self.ENCODERCLASS() # This one is for encoding the final segment, which might be shorter than the others.
2007-01-17 04:29:59 +00:00
self._codec_name = self._encoder.get_encoder_type()
self._encoder.set_params(self.segment_size, self.needed_shares, self.total_shares)
xyz
2007-03-30 03:19:52 +00:00
paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
self._block_size = self._encoder.get_block_size()
2006-12-01 11:06:11 +00:00
# first step: who should we upload to?
2007-03-30 03:19:52 +00:00
peers = self._client.get_permuted_peers(self._verifierid)
assert peers
trackers = [ (permutedid, PeerTracker(peerid, conn, self._share_size, self._block_size, self._verifierid),)
2007-03-30 03:19:52 +00:00
for permutedid, peerid, conn in peers ]
ring_things = [] # a list of (position_in_ring, whatami, x) where whatami is 0 if x is a sharenum or else 1 if x is a PeerTracker instance
ring_things.extend([ (permutedpeerid, 1, peer,) for permutedpeerid, peer in trackers ])
shares = [ (i * 2**160 / self.total_shares, 0, i) for i in range(self.total_shares) ]
ring_things.extend(shares)
ring_things.sort()
self.ring_things = collections.deque(ring_things)
self.usable_peers = set([peer for permutedid, peer in trackers])
self.used_peers = set()
self.unallocated_sharenums = set(shares)
d = self._locate_all_shareholders()
d.addCallback(self._send_shares)
d.addCallback(self._compute_uri)
2006-12-01 09:54:28 +00:00
return d
2007-03-30 03:19:52 +00:00
def _locate_all_shareholders(self):
"""
@return: a set of PeerTracker instances that have agreed to hold some
shares for us
"""
d = self._query_peers()
def _done(res):
if not self.unallocated_sharenums:
return self._used_peers
if not self.usable_peers:
if len(self.unallocated_sharenums) < (self.total_shares - self.shares_of_happiness):
# close enough
return self._used_peers
raise NotEnoughPeersError
return self._query_peers()
d.addCallback(_done)
return d
2007-03-30 03:19:52 +00:00
def _query_peers(self):
"""
@return: a deferred that fires when all queries have resolved
"""
# Choose a random starting point, talk to that peer.
self.ring_things.rotate(random.randrange(0, len(self.ring_things)))
# Walk backwards to find a peer. We know that we'll eventually find
# one because we earlier asserted that there was at least one.
while self.ring_things[0][1] != 1:
self.ring_things.rotate(-1)
startingpoint = self.ring_things[0]
peer = startingpoint[2]
assert isinstance(peer, PeerTracker), peer
self.ring_things.rotate(-1)
# loop invariant: at the top of the loop, we are always one step to
# the left of a peer, which is stored in the peer variable.
outstanding_queries = []
while self.ring_things[0] != startingpoint:
# Walk backwards to find the previous peer (could be the same one).
# Accumulate all shares that we find along the way.
sharenums_to_query = set()
while self.ring_things[0][1] != 1:
sharenums_to_query.add(self.ring_things[0][2])
self.ring_things.rotate(-1)
d = peer.query(sharenums_to_query)
d.addCallbacks(self._got_response, self._got_error, callbackArgs=(peer, sharenums_to_query), errbackArgs=(peer,))
outstanding_queries.append(d)
peer = self.ring_things[0][2]
assert isinstance(peer, PeerTracker), peer
self.ring_things.rotate(-1)
return defer.DeferredList(outstanding_queries)
def _got_response(self, (alreadygot, allocated), peer, shares_we_requested):
"""
@type alreadygot: a set of sharenums
@type allocated: a set of sharenums
"""
self.unallocated_sharenums -= alreadygot
self.unallocated_sharenums -= allocated
if allocated:
self.used_peers.add(peer)
if shares_we_requested - alreadygot - allocated:
# Then he didn't accept some of the shares, so he's full.
self.usable_peers.remove(peer)
def _got_error(self, f, peer):
self.usable_peers -= peer
def _send_shares(self, used_peers):
buckets = {}
for peer in used_peers:
buckets.update(peer.buckets)
assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
self._encoder.set_shareholders(buckets)
return self._encoder.start()
def _compute_uri(self, roothash):
params = self._encoder.get_serialized_params()
return pack_uri(self._codec_name, params, self._verifierid, roothash, self.needed_shares, self.total_shares, self._size, self._encoder.segment_size)
2006-12-01 09:54:28 +00:00
def netstring(s):
return "%d:%s," % (len(s), s)
class FileName:
implements(IUploadable)
def __init__(self, filename):
self._filename = filename
def get_filehandle(self):
return open(self._filename, "rb")
def close_filehandle(self, f):
f.close()
class Data:
implements(IUploadable)
def __init__(self, data):
self._data = data
def get_filehandle(self):
return StringIO(self._data)
def close_filehandle(self, f):
pass
class FileHandle:
implements(IUploadable)
def __init__(self, filehandle):
self._filehandle = filehandle
def get_filehandle(self):
return self._filehandle
def close_filehandle(self, f):
# the originator of the filehandle reserves the right to close it
pass
class Uploader(service.MultiService):
"""I am a service that allows file uploading.
"""
implements(IUploader)
name = "uploader"
uploader_class = FileUploader
debug = False
2007-03-30 03:19:52 +00:00
needed_shares = 25 # Number of shares required to reconstruct a file.
desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many.
total_shares = 100 # Total number of shares created by encoding. If everybody has room then this is is how many we will upload.
2006-12-03 03:31:43 +00:00
def _compute_verifierid(self, f):
hasher = sha.new(netstring("allmydata_v1_verifierid"))
f.seek(0)
data = f.read()
hasher.update(data)#f.read())
f.seek(0)
# note: this is only of the plaintext data, no encryption yet
return hasher.digest()
def upload(self, f):
# this returns the URI
assert self.parent
assert self.running
f = IUploadable(f)
fh = f.get_filehandle()
u = self.uploader_class(self.parent)
if self.debug:
u.debug = True
u.set_filehandle(fh)
# push two shares, require that we get two back. TODO: this is
# temporary, of course.
2007-03-30 03:19:52 +00:00
u.set_params(self.needed_shares, self.desired_shares, self.total_shares)
u.set_verifierid(self._compute_verifierid(fh))
d = u.start()
def _done(res):
f.close_filehandle(fh)
return res
d.addBoth(_done)
return d
# utility functions
def upload_data(self, data):
return self.upload(Data(data))
def upload_filename(self, filename):
return self.upload(FileName(filename))
def upload_filehandle(self, filehandle):
return self.upload(FileHandle(filehandle))