2007-01-21 22:01:34 +00:00
|
|
|
from zope.interface import implements
|
2007-03-30 03:19:52 +00:00
|
|
|
from twisted.python import log
|
2006-12-01 09:54:28 +00:00
|
|
|
from twisted.internet import defer
|
2006-12-03 01:27:18 +00:00
|
|
|
from twisted.application import service
|
2006-12-04 02:07:41 +00:00
|
|
|
from foolscap import Referenceable
|
2006-12-03 01:27:18 +00:00
|
|
|
|
2007-03-30 21:54:33 +00:00
|
|
|
from allmydata.util import idlib
|
2007-04-06 04:17:42 +00:00
|
|
|
from allmydata import encode
|
2007-01-17 04:29:59 +00:00
|
|
|
from allmydata.uri import pack_uri
|
2007-01-21 22:01:34 +00:00
|
|
|
from allmydata.interfaces import IUploadable, IUploader
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2006-12-03 03:31:43 +00:00
|
|
|
from cStringIO import StringIO
|
2007-03-30 03:19:52 +00:00
|
|
|
import collections, random, sha
|
2006-12-03 03:31:43 +00:00
|
|
|
|
2006-12-01 09:54:28 +00:00
|
|
|
class NotEnoughPeersError(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class HaveAllPeersError(Exception):
|
|
|
|
# we use this to jump out of the loop
|
|
|
|
pass
|
|
|
|
|
|
|
|
# this wants to live in storage, not here
|
|
|
|
class TooFullError(Exception):
|
|
|
|
pass
|
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
class PeerTracker:
|
2007-03-30 21:54:33 +00:00
|
|
|
def __init__(self, peerid, permutedid, connection, sharesize, blocksize, verifierid):
|
2007-03-30 03:19:52 +00:00
|
|
|
self.peerid = peerid
|
2007-03-30 21:54:33 +00:00
|
|
|
self.permutedid = permutedid
|
2007-03-30 23:50:50 +00:00
|
|
|
self.connection = connection # to an RIClient
|
2007-03-30 03:19:52 +00:00
|
|
|
self.buckets = {} # k: shareid, v: IRemoteBucketWriter
|
|
|
|
self.sharesize = sharesize
|
|
|
|
self.blocksize = blocksize
|
|
|
|
self.verifierid = verifierid
|
2007-03-30 23:50:50 +00:00
|
|
|
self._storageserver = None
|
2007-03-30 03:19:52 +00:00
|
|
|
|
|
|
|
def query(self, sharenums):
|
2007-03-30 23:50:50 +00:00
|
|
|
if not self._storageserver:
|
|
|
|
d = self.connection.callRemote("get_service", "storageserver")
|
|
|
|
d.addCallback(self._got_storageserver)
|
|
|
|
d.addCallback(lambda res: self._query(sharenums))
|
|
|
|
return d
|
|
|
|
return self._query(sharenums)
|
|
|
|
def _got_storageserver(self, storageserver):
|
|
|
|
self._storageserver = storageserver
|
|
|
|
def _query(self, sharenums):
|
|
|
|
d = self._storageserver.callRemote("allocate_buckets", self.verifierid,
|
|
|
|
sharenums, self.sharesize,
|
|
|
|
self.blocksize, canary=Referenceable())
|
2007-03-30 03:19:52 +00:00
|
|
|
d.addCallback(self._got_reply)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _got_reply(self, (alreadygot, buckets)):
|
2007-04-18 03:25:52 +00:00
|
|
|
#log.msg("%s._got_reply(%s)" % (self, (alreadygot, buckets)))
|
2007-03-30 03:19:52 +00:00
|
|
|
self.buckets.update(buckets)
|
|
|
|
return (alreadygot, set(buckets.keys()))
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
class FileUploader:
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2007-04-17 02:29:57 +00:00
|
|
|
def __init__(self, client, options={}):
|
2007-03-30 03:19:52 +00:00
|
|
|
self._client = client
|
2007-04-17 02:29:57 +00:00
|
|
|
self._options = options
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
def set_params(self, needed_shares, shares_of_happiness, total_shares):
|
|
|
|
self.needed_shares = needed_shares
|
|
|
|
self.shares_of_happiness = shares_of_happiness
|
|
|
|
self.total_shares = total_shares
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
def set_filehandle(self, filehandle):
|
|
|
|
self._filehandle = filehandle
|
|
|
|
filehandle.seek(0, 2)
|
|
|
|
self._size = filehandle.tell()
|
|
|
|
filehandle.seek(0)
|
|
|
|
|
2006-12-01 09:54:28 +00:00
|
|
|
def set_verifierid(self, vid):
|
|
|
|
assert isinstance(vid, str)
|
2007-01-16 04:22:22 +00:00
|
|
|
assert len(vid) == 20
|
2006-12-01 09:54:28 +00:00
|
|
|
self._verifierid = vid
|
|
|
|
|
|
|
|
def start(self):
|
2007-01-16 04:22:22 +00:00
|
|
|
"""Start uploading the file.
|
|
|
|
|
|
|
|
The source of the data to be uploaded must have been set before this
|
|
|
|
point by calling set_filehandle().
|
|
|
|
|
|
|
|
This method returns a Deferred that will fire with the URI (a
|
|
|
|
string)."""
|
|
|
|
|
|
|
|
log.msg("starting upload [%s]" % (idlib.b2a(self._verifierid),))
|
2007-03-30 03:19:52 +00:00
|
|
|
assert self.needed_shares
|
2007-01-16 04:22:22 +00:00
|
|
|
|
|
|
|
# create the encoder, so we can know how large the shares will be
|
2007-04-17 02:29:57 +00:00
|
|
|
self._encoder = encode.Encoder(self._options)
|
2007-03-30 19:30:14 +00:00
|
|
|
self._encoder.setup(self._filehandle)
|
2007-03-30 18:53:03 +00:00
|
|
|
share_size = self._encoder.get_share_size()
|
|
|
|
block_size = self._encoder.get_block_size()
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2007-03-30 18:53:03 +00:00
|
|
|
# we are responsible for locating the shareholders. self._encoder is
|
|
|
|
# responsible for handling the data and sending out the shares.
|
2007-03-30 03:19:52 +00:00
|
|
|
peers = self._client.get_permuted_peers(self._verifierid)
|
|
|
|
assert peers
|
2007-03-30 21:54:33 +00:00
|
|
|
trackers = [ PeerTracker(peerid, permutedid, conn, share_size, block_size, self._verifierid)
|
2007-03-30 03:19:52 +00:00
|
|
|
for permutedid, peerid, conn in peers ]
|
2007-03-30 21:54:33 +00:00
|
|
|
self.usable_peers = set(trackers) # this set shrinks over time
|
|
|
|
self.used_peers = set() # while this set grows
|
|
|
|
self.unallocated_sharenums = set(range(self.total_shares)) # this one shrinks
|
2007-03-30 03:19:52 +00:00
|
|
|
|
|
|
|
d = self._locate_all_shareholders()
|
|
|
|
d.addCallback(self._send_shares)
|
2007-01-16 04:22:22 +00:00
|
|
|
d.addCallback(self._compute_uri)
|
2006-12-01 09:54:28 +00:00
|
|
|
return d
|
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
def _locate_all_shareholders(self):
|
|
|
|
"""
|
|
|
|
@return: a set of PeerTracker instances that have agreed to hold some
|
|
|
|
shares for us
|
|
|
|
"""
|
2007-03-30 21:54:33 +00:00
|
|
|
return self._locate_more_shareholders()
|
|
|
|
|
|
|
|
def _locate_more_shareholders(self):
|
2007-03-30 03:19:52 +00:00
|
|
|
d = self._query_peers()
|
2007-03-30 21:54:33 +00:00
|
|
|
d.addCallback(self._located_some_shareholders)
|
2007-01-16 04:22:22 +00:00
|
|
|
return d
|
|
|
|
|
2007-03-30 21:54:33 +00:00
|
|
|
def _located_some_shareholders(self, res):
|
|
|
|
log.msg("_located_some_shareholders")
|
|
|
|
log.msg(" still need homes for %d shares, still have %d usable peers" % (len(self.unallocated_sharenums), len(self.usable_peers)))
|
|
|
|
if not self.unallocated_sharenums:
|
|
|
|
# Finished allocating places for all shares.
|
2007-04-06 22:45:45 +00:00
|
|
|
log.msg("%s._locate_all_shareholders() Finished allocating places for all shares." % self)
|
2007-03-30 21:54:33 +00:00
|
|
|
log.msg("used_peers is %s" % (self.used_peers,))
|
|
|
|
return self.used_peers
|
|
|
|
if not self.usable_peers:
|
|
|
|
# Ran out of peers who have space.
|
2007-04-06 22:45:45 +00:00
|
|
|
log.msg("%s._locate_all_shareholders() Ran out of peers who have space." % self)
|
2007-03-30 21:54:33 +00:00
|
|
|
if len(self.unallocated_sharenums) < (self.total_shares - self.shares_of_happiness):
|
|
|
|
# But we allocated places for enough shares.
|
|
|
|
log.msg("%s._locate_all_shareholders() But we allocated places for enough shares.")
|
|
|
|
return self.used_peers
|
|
|
|
raise NotEnoughPeersError
|
|
|
|
# we need to keep trying
|
|
|
|
return self._locate_more_shareholders()
|
|
|
|
|
|
|
|
def _create_ring_of_things(self):
|
|
|
|
PEER = 1 # must sort later than SHARE, for consistency with download
|
|
|
|
SHARE = 0
|
|
|
|
ring_of_things = [] # a list of (position_in_ring, whatami, x) where whatami is SHARE if x is a sharenum or else PEER if x is a PeerTracker instance
|
|
|
|
ring_of_things.extend([ (peer.permutedid, PEER, peer,)
|
|
|
|
for peer in self.usable_peers ])
|
|
|
|
shares = [ (i * 2**160 / self.total_shares, SHARE, i)
|
|
|
|
for i in self.unallocated_sharenums]
|
|
|
|
ring_of_things.extend(shares)
|
|
|
|
ring_of_things.sort()
|
|
|
|
ring_of_things = collections.deque(ring_of_things)
|
|
|
|
return ring_of_things
|
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
def _query_peers(self):
|
|
|
|
"""
|
|
|
|
@return: a deferred that fires when all queries have resolved
|
|
|
|
"""
|
2007-03-30 21:54:33 +00:00
|
|
|
PEER = 1
|
|
|
|
SHARE = 0
|
|
|
|
ring = self._create_ring_of_things()
|
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
# Choose a random starting point, talk to that peer.
|
2007-03-30 21:54:33 +00:00
|
|
|
ring.rotate(random.randrange(0, len(ring)))
|
2007-03-30 03:19:52 +00:00
|
|
|
|
|
|
|
# Walk backwards to find a peer. We know that we'll eventually find
|
|
|
|
# one because we earlier asserted that there was at least one.
|
2007-03-30 21:54:33 +00:00
|
|
|
while ring[0][1] != PEER:
|
|
|
|
ring.rotate(-1)
|
|
|
|
peer = ring[0][2]
|
2007-03-30 03:19:52 +00:00
|
|
|
assert isinstance(peer, PeerTracker), peer
|
2007-03-30 21:54:33 +00:00
|
|
|
ring.rotate(-1)
|
2007-03-30 03:19:52 +00:00
|
|
|
|
|
|
|
# loop invariant: at the top of the loop, we are always one step to
|
|
|
|
# the left of a peer, which is stored in the peer variable.
|
|
|
|
outstanding_queries = []
|
2007-03-30 21:54:33 +00:00
|
|
|
sharenums_to_query = set()
|
|
|
|
for i in range(len(ring)):
|
|
|
|
if ring[0][1] == SHARE:
|
|
|
|
sharenums_to_query.add(ring[0][2])
|
|
|
|
else:
|
|
|
|
d = peer.query(sharenums_to_query)
|
|
|
|
d.addCallbacks(self._got_response, self._got_error, callbackArgs=(peer, sharenums_to_query), errbackArgs=(peer,))
|
|
|
|
outstanding_queries.append(d)
|
|
|
|
d.addErrback(log.err)
|
|
|
|
peer = ring[0][2]
|
|
|
|
sharenums_to_query = set()
|
|
|
|
ring.rotate(-1)
|
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
return defer.DeferredList(outstanding_queries)
|
|
|
|
|
|
|
|
def _got_response(self, (alreadygot, allocated), peer, shares_we_requested):
|
|
|
|
"""
|
|
|
|
@type alreadygot: a set of sharenums
|
|
|
|
@type allocated: a set of sharenums
|
|
|
|
"""
|
2007-03-30 23:55:04 +00:00
|
|
|
# TODO: some future version of Foolscap might not convert inbound
|
|
|
|
# sets into sets.Set on us, even when we're using 2.4
|
|
|
|
alreadygot = set(alreadygot)
|
|
|
|
allocated = set(allocated)
|
2007-04-18 03:25:52 +00:00
|
|
|
#log.msg("%s._got_response(%s, %s, %s): self.unallocated_sharenums: %s, unhandled: %s" % (self, (alreadygot, allocated), peer, shares_we_requested, self.unallocated_sharenums, shares_we_requested - alreadygot - allocated))
|
2007-03-30 03:19:52 +00:00
|
|
|
self.unallocated_sharenums -= alreadygot
|
|
|
|
self.unallocated_sharenums -= allocated
|
|
|
|
|
|
|
|
if allocated:
|
2007-03-30 23:50:50 +00:00
|
|
|
self.used_peers.add(peer)
|
2007-03-30 03:19:52 +00:00
|
|
|
|
|
|
|
if shares_we_requested - alreadygot - allocated:
|
2007-04-18 03:25:52 +00:00
|
|
|
#log.msg("%s._got_response(%s, %s, %s): self.unallocated_sharenums: %s, unhandled: %s HE'S FULL" % (self, (alreadygot, allocated), peer, shares_we_requested, self.unallocated_sharenums, shares_we_requested - alreadygot - allocated))
|
2007-03-30 03:19:52 +00:00
|
|
|
# Then he didn't accept some of the shares, so he's full.
|
|
|
|
self.usable_peers.remove(peer)
|
|
|
|
|
|
|
|
def _got_error(self, f, peer):
|
2007-03-30 21:54:33 +00:00
|
|
|
log.msg("%s._got_error(%s, %s)" % (self, f, peer,))
|
|
|
|
self.usable_peers.remove(peer)
|
2007-03-30 03:19:52 +00:00
|
|
|
|
|
|
|
def _send_shares(self, used_peers):
|
2007-03-30 21:54:33 +00:00
|
|
|
"""
|
|
|
|
@param used_peers: a sequence of PeerTracker objects
|
|
|
|
"""
|
|
|
|
log.msg("_send_shares, used_peers is %s" % (used_peers,))
|
|
|
|
for peer in used_peers:
|
|
|
|
assert isinstance(peer, PeerTracker)
|
2007-03-30 03:19:52 +00:00
|
|
|
buckets = {}
|
|
|
|
for peer in used_peers:
|
|
|
|
buckets.update(peer.buckets)
|
|
|
|
assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
|
|
|
|
self._encoder.set_shareholders(buckets)
|
|
|
|
return self._encoder.start()
|
|
|
|
|
|
|
|
def _compute_uri(self, roothash):
|
2007-03-30 19:30:14 +00:00
|
|
|
codec_type = self._encoder._codec.get_encoder_type()
|
|
|
|
codec_params = self._encoder._codec.get_serialized_params()
|
2007-03-30 23:50:50 +00:00
|
|
|
tail_codec_params = self._encoder._tail_codec.get_serialized_params()
|
|
|
|
return pack_uri(codec_type, codec_params, tail_codec_params,
|
|
|
|
self._verifierid,
|
|
|
|
roothash, self.needed_shares, self.total_shares,
|
|
|
|
self._size, self._encoder.segment_size)
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
def netstring(s):
|
|
|
|
return "%d:%s," % (len(s), s)
|
|
|
|
|
2006-12-04 02:07:41 +00:00
|
|
|
class FileName:
|
|
|
|
implements(IUploadable)
|
|
|
|
def __init__(self, filename):
|
|
|
|
self._filename = filename
|
|
|
|
def get_filehandle(self):
|
|
|
|
return open(self._filename, "rb")
|
|
|
|
def close_filehandle(self, f):
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
class Data:
|
|
|
|
implements(IUploadable)
|
|
|
|
def __init__(self, data):
|
|
|
|
self._data = data
|
|
|
|
def get_filehandle(self):
|
|
|
|
return StringIO(self._data)
|
|
|
|
def close_filehandle(self, f):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class FileHandle:
|
|
|
|
implements(IUploadable)
|
|
|
|
def __init__(self, filehandle):
|
|
|
|
self._filehandle = filehandle
|
|
|
|
def get_filehandle(self):
|
|
|
|
return self._filehandle
|
|
|
|
def close_filehandle(self, f):
|
|
|
|
# the originator of the filehandle reserves the right to close it
|
|
|
|
pass
|
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
class Uploader(service.MultiService):
|
|
|
|
"""I am a service that allows file uploading.
|
|
|
|
"""
|
2007-01-21 22:01:34 +00:00
|
|
|
implements(IUploader)
|
2006-12-03 01:27:18 +00:00
|
|
|
name = "uploader"
|
2007-01-16 04:22:22 +00:00
|
|
|
uploader_class = FileUploader
|
2006-12-03 01:27:18 +00:00
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
needed_shares = 25 # Number of shares required to reconstruct a file.
|
|
|
|
desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many.
|
|
|
|
total_shares = 100 # Total number of shares created by encoding. If everybody has room then this is is how many we will upload.
|
|
|
|
|
2006-12-03 03:31:43 +00:00
|
|
|
def _compute_verifierid(self, f):
|
2006-12-03 01:27:18 +00:00
|
|
|
hasher = sha.new(netstring("allmydata_v1_verifierid"))
|
|
|
|
f.seek(0)
|
2007-01-16 04:22:22 +00:00
|
|
|
data = f.read()
|
|
|
|
hasher.update(data)#f.read())
|
2006-12-03 01:27:18 +00:00
|
|
|
f.seek(0)
|
|
|
|
# note: this is only of the plaintext data, no encryption yet
|
|
|
|
return hasher.digest()
|
|
|
|
|
2007-04-17 02:29:57 +00:00
|
|
|
def upload(self, f, options={}):
|
2007-01-19 09:23:03 +00:00
|
|
|
# this returns the URI
|
2006-12-03 01:27:18 +00:00
|
|
|
assert self.parent
|
|
|
|
assert self.running
|
2006-12-04 02:07:41 +00:00
|
|
|
f = IUploadable(f)
|
|
|
|
fh = f.get_filehandle()
|
2007-04-17 02:29:57 +00:00
|
|
|
u = self.uploader_class(self.parent, options)
|
2006-12-04 02:07:41 +00:00
|
|
|
u.set_filehandle(fh)
|
2007-03-30 03:19:52 +00:00
|
|
|
u.set_params(self.needed_shares, self.desired_shares, self.total_shares)
|
2006-12-04 02:07:41 +00:00
|
|
|
u.set_verifierid(self._compute_verifierid(fh))
|
2006-12-03 01:27:18 +00:00
|
|
|
d = u.start()
|
2006-12-04 02:07:41 +00:00
|
|
|
def _done(res):
|
|
|
|
f.close_filehandle(fh)
|
|
|
|
return res
|
|
|
|
d.addBoth(_done)
|
2006-12-03 01:27:18 +00:00
|
|
|
return d
|
|
|
|
|
2006-12-04 02:07:41 +00:00
|
|
|
# utility functions
|
2007-04-17 20:40:47 +00:00
|
|
|
def upload_data(self, data, options={}):
|
|
|
|
return self.upload(Data(data), options)
|
|
|
|
def upload_filename(self, filename, options={}):
|
|
|
|
return self.upload(FileName(filename), options)
|
|
|
|
def upload_filehandle(self, filehandle, options={}):
|
|
|
|
return self.upload(FileHandle(filehandle), options)
|