2007-07-20 01:21:44 +00:00
|
|
|
|
|
|
|
import os
|
2007-01-21 22:01:34 +00:00
|
|
|
from zope.interface import implements
|
2008-01-10 03:25:50 +00:00
|
|
|
from twisted.python import failure
|
2006-12-01 09:54:28 +00:00
|
|
|
from twisted.internet import defer
|
2006-12-03 01:27:18 +00:00
|
|
|
from twisted.application import service
|
2006-12-04 02:07:41 +00:00
|
|
|
from foolscap import Referenceable
|
2008-01-25 04:51:34 +00:00
|
|
|
from foolscap import eventual
|
2008-01-10 03:25:50 +00:00
|
|
|
from foolscap.logging import log
|
2006-12-03 01:27:18 +00:00
|
|
|
|
2007-08-28 02:00:18 +00:00
|
|
|
from allmydata.util.hashutil import file_renewal_secret_hash, \
|
|
|
|
file_cancel_secret_hash, bucket_renewal_secret_hash, \
|
|
|
|
bucket_cancel_secret_hash, plaintext_hasher, \
|
|
|
|
storage_index_chk_hash, plaintext_segment_hasher, key_hasher
|
2007-07-21 22:40:36 +00:00
|
|
|
from allmydata import encode, storage, hashtree, uri
|
2007-09-16 08:24:07 +00:00
|
|
|
from allmydata.util import idlib, mathutil
|
2007-12-20 00:55:28 +00:00
|
|
|
from allmydata.util.assertutil import precondition
|
2008-01-10 04:25:47 +00:00
|
|
|
from allmydata.interfaces import IUploadable, IUploader, \
|
|
|
|
IEncryptedUploadable, RIEncryptedUploadable
|
2007-12-04 00:27:46 +00:00
|
|
|
from pycryptopp.cipher.aes import AES
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2006-12-03 03:31:43 +00:00
|
|
|
from cStringIO import StringIO
|
|
|
|
|
2007-07-12 20:22:36 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
KiB=1024
|
|
|
|
MiB=1024*KiB
|
|
|
|
GiB=1024*MiB
|
|
|
|
TiB=1024*GiB
|
|
|
|
PiB=1024*TiB
|
|
|
|
|
2006-12-01 09:54:28 +00:00
|
|
|
class HaveAllPeersError(Exception):
|
|
|
|
# we use this to jump out of the loop
|
|
|
|
pass
|
|
|
|
|
|
|
|
# this wants to live in storage, not here
|
|
|
|
class TooFullError(Exception):
|
|
|
|
pass
|
|
|
|
|
2007-07-13 22:09:01 +00:00
|
|
|
# our current uri_extension is 846 bytes for small files, a few bytes
|
|
|
|
# more for larger ones (since the filesize is encoded in decimal in a
|
|
|
|
# few places). Ask for a little bit more just in case we need it. If
|
|
|
|
# the extension changes size, we can change EXTENSION_SIZE to
|
|
|
|
# allocate a more accurate amount of space.
|
|
|
|
EXTENSION_SIZE = 1000
|
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
class PeerTracker:
|
2007-06-10 03:46:04 +00:00
|
|
|
def __init__(self, peerid, permutedid, connection,
|
2007-07-13 21:04:49 +00:00
|
|
|
sharesize, blocksize, num_segments, num_share_hashes,
|
2007-08-28 02:00:18 +00:00
|
|
|
storage_index,
|
|
|
|
bucket_renewal_secret, bucket_cancel_secret):
|
2007-12-20 00:55:28 +00:00
|
|
|
precondition(isinstance(peerid, str), peerid)
|
|
|
|
precondition(len(peerid) == 20, peerid)
|
2007-03-30 03:19:52 +00:00
|
|
|
self.peerid = peerid
|
2007-03-30 21:54:33 +00:00
|
|
|
self.permutedid = permutedid
|
2007-03-30 23:50:50 +00:00
|
|
|
self.connection = connection # to an RIClient
|
2007-03-30 03:19:52 +00:00
|
|
|
self.buckets = {} # k: shareid, v: IRemoteBucketWriter
|
|
|
|
self.sharesize = sharesize
|
2007-07-13 22:09:01 +00:00
|
|
|
#print "PeerTracker", peerid, permutedid, sharesize
|
2007-07-14 00:25:45 +00:00
|
|
|
as = storage.allocated_size(sharesize,
|
|
|
|
num_segments,
|
|
|
|
num_share_hashes,
|
|
|
|
EXTENSION_SIZE)
|
2007-07-13 22:09:01 +00:00
|
|
|
self.allocated_size = as
|
2007-11-01 22:22:47 +00:00
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
self.blocksize = blocksize
|
2007-07-13 21:04:49 +00:00
|
|
|
self.num_segments = num_segments
|
|
|
|
self.num_share_hashes = num_share_hashes
|
2007-08-28 00:28:51 +00:00
|
|
|
self.storage_index = storage_index
|
2007-03-30 23:50:50 +00:00
|
|
|
self._storageserver = None
|
2007-03-30 03:19:52 +00:00
|
|
|
|
2007-08-28 02:00:18 +00:00
|
|
|
self.renew_secret = bucket_renewal_secret
|
|
|
|
self.cancel_secret = bucket_cancel_secret
|
2007-08-28 00:28:51 +00:00
|
|
|
|
2007-09-16 08:24:07 +00:00
|
|
|
def __repr__(self):
|
|
|
|
return ("<PeerTracker for peer %s and SI %s>"
|
2007-12-20 00:55:28 +00:00
|
|
|
% (idlib.shortnodeid_b2a(self.peerid),
|
2007-09-16 08:24:07 +00:00
|
|
|
idlib.b2a(self.storage_index)[:6]))
|
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
def query(self, sharenums):
|
2007-03-30 23:50:50 +00:00
|
|
|
if not self._storageserver:
|
|
|
|
d = self.connection.callRemote("get_service", "storageserver")
|
|
|
|
d.addCallback(self._got_storageserver)
|
|
|
|
d.addCallback(lambda res: self._query(sharenums))
|
|
|
|
return d
|
|
|
|
return self._query(sharenums)
|
|
|
|
def _got_storageserver(self, storageserver):
|
|
|
|
self._storageserver = storageserver
|
|
|
|
def _query(self, sharenums):
|
2007-07-13 22:09:01 +00:00
|
|
|
#print " query", self.peerid, len(sharenums)
|
2007-06-10 03:46:04 +00:00
|
|
|
d = self._storageserver.callRemote("allocate_buckets",
|
2007-08-28 00:28:51 +00:00
|
|
|
self.storage_index,
|
|
|
|
self.renew_secret,
|
|
|
|
self.cancel_secret,
|
2007-07-13 22:09:01 +00:00
|
|
|
sharenums,
|
|
|
|
self.allocated_size,
|
2007-06-10 03:46:04 +00:00
|
|
|
canary=Referenceable())
|
2007-03-30 03:19:52 +00:00
|
|
|
d.addCallback(self._got_reply)
|
|
|
|
return d
|
2007-11-01 22:22:47 +00:00
|
|
|
|
2007-03-30 03:19:52 +00:00
|
|
|
def _got_reply(self, (alreadygot, buckets)):
|
2007-04-18 03:25:52 +00:00
|
|
|
#log.msg("%s._got_reply(%s)" % (self, (alreadygot, buckets)))
|
2007-07-13 21:04:49 +00:00
|
|
|
b = {}
|
|
|
|
for sharenum, rref in buckets.iteritems():
|
2007-07-14 00:25:45 +00:00
|
|
|
bp = storage.WriteBucketProxy(rref, self.sharesize,
|
|
|
|
self.blocksize,
|
|
|
|
self.num_segments,
|
|
|
|
self.num_share_hashes,
|
2008-01-29 01:53:51 +00:00
|
|
|
EXTENSION_SIZE,
|
|
|
|
self.peerid)
|
2007-07-13 21:04:49 +00:00
|
|
|
b[sharenum] = bp
|
2007-07-09 06:27:46 +00:00
|
|
|
self.buckets.update(b)
|
|
|
|
return (alreadygot, set(b.keys()))
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2007-09-16 08:24:07 +00:00
|
|
|
class Tahoe2PeerSelector:
|
|
|
|
|
2008-01-15 04:19:20 +00:00
|
|
|
def __init__(self, upload_id, logparent=None):
|
2007-09-16 08:24:07 +00:00
|
|
|
self.upload_id = upload_id
|
|
|
|
self.query_count, self.good_query_count, self.bad_query_count = 0,0,0
|
|
|
|
self.error_count = 0
|
|
|
|
self.num_peers_contacted = 0
|
|
|
|
self.last_failure_msg = None
|
2008-01-15 04:19:20 +00:00
|
|
|
self._log_parent = log.msg("%s starting" % self, parent=logparent)
|
2007-09-16 08:24:07 +00:00
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "<Tahoe2PeerSelector for upload %s>" % self.upload_id
|
|
|
|
|
|
|
|
def get_shareholders(self, client,
|
|
|
|
storage_index, share_size, block_size,
|
|
|
|
num_segments, total_shares, shares_of_happiness,
|
|
|
|
push_to_ourselves):
|
|
|
|
"""
|
|
|
|
@return: a set of PeerTracker instances that have agreed to hold some
|
|
|
|
shares for us
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.total_shares = total_shares
|
|
|
|
self.shares_of_happiness = shares_of_happiness
|
|
|
|
|
|
|
|
self.homeless_shares = range(total_shares)
|
|
|
|
# self.uncontacted_peers = list() # peers we haven't asked yet
|
2007-09-17 00:08:34 +00:00
|
|
|
self.contacted_peers = [] # peers worth asking again
|
|
|
|
self.contacted_peers2 = [] # peers that we have asked again
|
2008-01-15 04:19:20 +00:00
|
|
|
self._started_second_pass = False
|
2007-09-16 08:24:07 +00:00
|
|
|
self.use_peers = set() # PeerTrackers that have shares assigned to them
|
|
|
|
self.preexisting_shares = {} # sharenum -> PeerTracker holding the share
|
|
|
|
|
|
|
|
peers = client.get_permuted_peers(storage_index, push_to_ourselves)
|
|
|
|
if not peers:
|
|
|
|
raise encode.NotEnoughPeersError("client gave us zero peers")
|
|
|
|
|
|
|
|
# figure out how much space to ask for
|
|
|
|
|
|
|
|
# this needed_hashes computation should mirror
|
|
|
|
# Encoder.send_all_share_hash_trees. We use an IncompleteHashTree
|
|
|
|
# (instead of a HashTree) because we don't require actual hashing
|
|
|
|
# just to count the levels.
|
|
|
|
ht = hashtree.IncompleteHashTree(total_shares)
|
|
|
|
num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
|
|
|
|
|
|
|
|
# decide upon the renewal/cancel secrets, to include them in the
|
|
|
|
# allocat_buckets query.
|
|
|
|
client_renewal_secret = client.get_renewal_secret()
|
|
|
|
client_cancel_secret = client.get_cancel_secret()
|
|
|
|
|
|
|
|
file_renewal_secret = file_renewal_secret_hash(client_renewal_secret,
|
|
|
|
storage_index)
|
|
|
|
file_cancel_secret = file_cancel_secret_hash(client_cancel_secret,
|
|
|
|
storage_index)
|
|
|
|
|
|
|
|
trackers = [ PeerTracker(peerid, permutedid, conn,
|
|
|
|
share_size, block_size,
|
|
|
|
num_segments, num_share_hashes,
|
|
|
|
storage_index,
|
|
|
|
bucket_renewal_secret_hash(file_renewal_secret,
|
|
|
|
peerid),
|
|
|
|
bucket_cancel_secret_hash(file_cancel_secret,
|
|
|
|
peerid),
|
|
|
|
)
|
|
|
|
for permutedid, peerid, conn in peers ]
|
|
|
|
self.uncontacted_peers = trackers
|
|
|
|
|
|
|
|
d = defer.maybeDeferred(self._loop)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _loop(self):
|
|
|
|
if not self.homeless_shares:
|
|
|
|
# all done
|
|
|
|
msg = ("placed all %d shares, "
|
|
|
|
"sent %d queries to %d peers, "
|
|
|
|
"%d queries placed some shares, %d placed none, "
|
|
|
|
"got %d errors" %
|
|
|
|
(self.total_shares,
|
|
|
|
self.query_count, self.num_peers_contacted,
|
|
|
|
self.good_query_count, self.bad_query_count,
|
|
|
|
self.error_count))
|
2008-01-15 04:19:20 +00:00
|
|
|
log.msg("peer selection successful for %s: %s" % (self, msg),
|
|
|
|
parent=self._log_parent)
|
2007-09-16 08:24:07 +00:00
|
|
|
return self.use_peers
|
|
|
|
|
|
|
|
if self.uncontacted_peers:
|
|
|
|
peer = self.uncontacted_peers.pop(0)
|
|
|
|
# TODO: don't pre-convert all peerids to PeerTrackers
|
|
|
|
assert isinstance(peer, PeerTracker)
|
|
|
|
|
|
|
|
shares_to_ask = set([self.homeless_shares.pop(0)])
|
|
|
|
self.query_count += 1
|
|
|
|
self.num_peers_contacted += 1
|
|
|
|
d = peer.query(shares_to_ask)
|
2007-09-17 00:08:34 +00:00
|
|
|
d.addBoth(self._got_response, peer, shares_to_ask,
|
|
|
|
self.contacted_peers)
|
2007-09-16 08:24:07 +00:00
|
|
|
return d
|
2007-09-17 00:08:34 +00:00
|
|
|
elif self.contacted_peers:
|
2007-09-16 08:24:07 +00:00
|
|
|
# ask a peer that we've already asked.
|
2008-01-15 04:19:20 +00:00
|
|
|
if not self._started_second_pass:
|
|
|
|
log.msg("starting second pass", parent=self._log_parent,
|
|
|
|
level=log.NOISY)
|
|
|
|
self._started_second_pass = True
|
2007-09-17 00:08:34 +00:00
|
|
|
num_shares = mathutil.div_ceil(len(self.homeless_shares),
|
|
|
|
len(self.contacted_peers))
|
2007-09-16 08:24:07 +00:00
|
|
|
peer = self.contacted_peers.pop(0)
|
2007-09-17 00:08:34 +00:00
|
|
|
shares_to_ask = set(self.homeless_shares[:num_shares])
|
|
|
|
self.homeless_shares[:num_shares] = []
|
2007-09-16 08:24:07 +00:00
|
|
|
self.query_count += 1
|
|
|
|
d = peer.query(shares_to_ask)
|
2007-09-17 00:08:34 +00:00
|
|
|
d.addBoth(self._got_response, peer, shares_to_ask,
|
|
|
|
self.contacted_peers2)
|
2007-09-16 08:24:07 +00:00
|
|
|
return d
|
2007-09-17 00:08:34 +00:00
|
|
|
elif self.contacted_peers2:
|
|
|
|
# we've finished the second-or-later pass. Move all the remaining
|
|
|
|
# peers back into self.contacted_peers for the next pass.
|
|
|
|
self.contacted_peers.extend(self.contacted_peers2)
|
|
|
|
self.contacted_peers[:] = []
|
|
|
|
return self._loop()
|
2007-09-16 08:24:07 +00:00
|
|
|
else:
|
|
|
|
# no more peers. If we haven't placed enough shares, we fail.
|
|
|
|
placed_shares = self.total_shares - len(self.homeless_shares)
|
|
|
|
if placed_shares < self.shares_of_happiness:
|
|
|
|
msg = ("placed %d shares out of %d total (%d homeless), "
|
|
|
|
"sent %d queries to %d peers, "
|
|
|
|
"%d queries placed some shares, %d placed none, "
|
|
|
|
"got %d errors" %
|
|
|
|
(self.total_shares - len(self.homeless_shares),
|
|
|
|
self.total_shares, len(self.homeless_shares),
|
|
|
|
self.query_count, self.num_peers_contacted,
|
|
|
|
self.good_query_count, self.bad_query_count,
|
|
|
|
self.error_count))
|
|
|
|
msg = "peer selection failed for %s: %s" % (self, msg)
|
|
|
|
if self.last_failure_msg:
|
|
|
|
msg += " (%s)" % (self.last_failure_msg,)
|
2008-01-15 04:19:20 +00:00
|
|
|
log.msg(msg, level=log.UNUSUAL, parent=self._log_parent)
|
2007-09-16 08:24:07 +00:00
|
|
|
raise encode.NotEnoughPeersError(msg)
|
|
|
|
else:
|
|
|
|
# we placed enough to be happy, so we're done
|
|
|
|
return self.use_peers
|
|
|
|
|
2007-09-17 00:08:34 +00:00
|
|
|
def _got_response(self, res, peer, shares_to_ask, put_peer_here):
|
2007-09-16 08:24:07 +00:00
|
|
|
if isinstance(res, failure.Failure):
|
|
|
|
# This is unusual, and probably indicates a bug or a network
|
|
|
|
# problem.
|
2008-01-15 04:19:20 +00:00
|
|
|
log.msg("%s got error during peer selection: %s" % (peer, res),
|
|
|
|
level=log.UNUSUAL, parent=self._log_parent)
|
2007-09-16 08:24:07 +00:00
|
|
|
self.error_count += 1
|
|
|
|
self.homeless_shares = list(shares_to_ask) + self.homeless_shares
|
2007-09-17 00:08:34 +00:00
|
|
|
if (self.uncontacted_peers
|
|
|
|
or self.contacted_peers
|
|
|
|
or self.contacted_peers2):
|
2007-09-16 08:24:07 +00:00
|
|
|
# there is still hope, so just loop
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
# No more peers, so this upload might fail (it depends upon
|
|
|
|
# whether we've hit shares_of_happiness or not). Log the last
|
|
|
|
# failure we got: if a coding error causes all peers to fail
|
|
|
|
# in the same way, this allows the common failure to be seen
|
|
|
|
# by the uploader and should help with debugging
|
|
|
|
msg = ("last failure (from %s) was: %s" % (peer, res))
|
|
|
|
self.last_failure_msg = msg
|
|
|
|
else:
|
|
|
|
(alreadygot, allocated) = res
|
2008-01-15 04:19:20 +00:00
|
|
|
log.msg("response from peer %s: alreadygot=%s, allocated=%s"
|
|
|
|
% (idlib.shortnodeid_b2a(peer.peerid),
|
|
|
|
tuple(sorted(alreadygot)), tuple(sorted(allocated))),
|
|
|
|
level=log.NOISY, parent=self._log_parent)
|
2007-09-16 08:24:07 +00:00
|
|
|
progress = False
|
|
|
|
for s in alreadygot:
|
|
|
|
self.preexisting_shares[s] = peer
|
|
|
|
if s in self.homeless_shares:
|
|
|
|
self.homeless_shares.remove(s)
|
|
|
|
progress = True
|
|
|
|
|
|
|
|
# the PeerTracker will remember which shares were allocated on
|
|
|
|
# that peer. We just have to remember to use them.
|
|
|
|
if allocated:
|
|
|
|
self.use_peers.add(peer)
|
|
|
|
progress = True
|
|
|
|
|
|
|
|
not_yet_present = set(shares_to_ask) - set(alreadygot)
|
|
|
|
still_homeless = not_yet_present - set(allocated)
|
|
|
|
|
|
|
|
if progress:
|
|
|
|
# they accepted or already had at least one share, so
|
|
|
|
# progress has been made
|
|
|
|
self.good_query_count += 1
|
|
|
|
else:
|
|
|
|
self.bad_query_count += 1
|
|
|
|
|
|
|
|
if still_homeless:
|
|
|
|
# In networks with lots of space, this is very unusual and
|
|
|
|
# probably indicates an error. In networks with peers that
|
|
|
|
# are full, it is merely unusual. In networks that are very
|
|
|
|
# full, it is common, and many uploads will fail. In most
|
|
|
|
# cases, this is obviously not fatal, and we'll just use some
|
|
|
|
# other peers.
|
|
|
|
|
|
|
|
# some shares are still homeless, keep trying to find them a
|
|
|
|
# home. The ones that were rejected get first priority.
|
|
|
|
self.homeless_shares = (list(still_homeless)
|
|
|
|
+ self.homeless_shares)
|
|
|
|
# Since they were unable to accept all of our requests, so it
|
|
|
|
# is safe to assume that asking them again won't help.
|
|
|
|
else:
|
|
|
|
# if they *were* able to accept everything, they might be
|
|
|
|
# willing to accept even more.
|
2007-09-17 00:08:34 +00:00
|
|
|
put_peer_here.append(peer)
|
2007-09-16 08:24:07 +00:00
|
|
|
|
|
|
|
# now loop
|
|
|
|
return self._loop()
|
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
class EncryptAnUploadable:
|
|
|
|
"""This is a wrapper that takes an IUploadable and provides
|
|
|
|
IEncryptedUploadable."""
|
|
|
|
implements(IEncryptedUploadable)
|
2008-01-25 00:25:33 +00:00
|
|
|
CHUNKSIZE = 50*1000
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def __init__(self, original, default_encoding_parameters):
|
2008-01-25 00:25:33 +00:00
|
|
|
self.original = IUploadable(original)
|
2008-01-16 10:03:35 +00:00
|
|
|
assert isinstance(default_encoding_parameters, dict)
|
|
|
|
self._default_encoding_parameters = default_encoding_parameters
|
2007-07-24 02:31:53 +00:00
|
|
|
self._encryptor = None
|
2007-08-28 02:00:18 +00:00
|
|
|
self._plaintext_hasher = plaintext_hasher()
|
2007-07-24 02:31:53 +00:00
|
|
|
self._plaintext_segment_hasher = None
|
|
|
|
self._plaintext_segment_hashes = []
|
2008-01-16 10:03:35 +00:00
|
|
|
self._encoding_parameters = None
|
|
|
|
self._file_size = None
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2008-01-17 08:11:35 +00:00
|
|
|
def log(self, *args, **kwargs):
|
|
|
|
if "facility" not in kwargs:
|
|
|
|
kwargs["facility"] = "upload.encryption"
|
|
|
|
return log.msg(*args, **kwargs)
|
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
def get_size(self):
|
2008-01-16 10:03:35 +00:00
|
|
|
if self._file_size is not None:
|
|
|
|
return defer.succeed(self._file_size)
|
|
|
|
d = self.original.get_size()
|
|
|
|
def _got_size(size):
|
|
|
|
self._file_size = size
|
|
|
|
return size
|
|
|
|
d.addCallback(_got_size)
|
|
|
|
return d
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def get_all_encoding_parameters(self):
|
|
|
|
if self._encoding_parameters is not None:
|
|
|
|
return defer.succeed(self._encoding_parameters)
|
|
|
|
d1 = self.get_size()
|
|
|
|
d2 = self.original.get_maximum_segment_size()
|
|
|
|
d3 = self.original.get_encoding_parameters()
|
|
|
|
d = defer.DeferredList([d1, d2, d3],
|
|
|
|
fireOnOneErrback=True, consumeErrors=True)
|
|
|
|
def _got_pieces(res):
|
|
|
|
file_size = res[0][1]
|
|
|
|
max_segsize = res[1][1]
|
|
|
|
params = res[2][1]
|
|
|
|
|
|
|
|
defaults = self._default_encoding_parameters
|
|
|
|
if max_segsize is None:
|
|
|
|
max_segsize = defaults["max_segment_size"]
|
|
|
|
|
|
|
|
if params is None:
|
|
|
|
k = defaults["k"]
|
|
|
|
happy = defaults["happy"]
|
|
|
|
n = defaults["n"]
|
|
|
|
else:
|
|
|
|
precondition(isinstance(params, tuple), params)
|
|
|
|
(k, happy, n) = params
|
|
|
|
|
|
|
|
# for small files, shrink the segment size to avoid wasting space
|
|
|
|
segsize = min(max_segsize, file_size)
|
|
|
|
# this must be a multiple of 'required_shares'==k
|
|
|
|
segsize = mathutil.next_multiple(segsize, k)
|
|
|
|
self._segment_size = segsize # used by segment hashers
|
|
|
|
self._encoding_parameters = (k, happy, n, segsize)
|
2008-01-17 08:11:35 +00:00
|
|
|
self.log("my encoding parameters: %s" %
|
|
|
|
(self._encoding_parameters,), level=log.NOISY)
|
2008-01-16 10:03:35 +00:00
|
|
|
return self._encoding_parameters
|
|
|
|
d.addCallback(_got_pieces)
|
|
|
|
return d
|
2007-07-24 02:31:53 +00:00
|
|
|
|
|
|
|
def _get_encryptor(self):
|
|
|
|
if self._encryptor:
|
|
|
|
return defer.succeed(self._encryptor)
|
|
|
|
|
|
|
|
d = self.original.get_encryption_key()
|
|
|
|
def _got(key):
|
2007-12-04 00:27:46 +00:00
|
|
|
e = AES(key)
|
2007-07-24 02:31:53 +00:00
|
|
|
self._encryptor = e
|
|
|
|
|
2007-08-28 02:00:18 +00:00
|
|
|
storage_index = storage_index_chk_hash(key)
|
2007-07-24 02:31:53 +00:00
|
|
|
assert isinstance(storage_index, str)
|
|
|
|
# There's no point to having the SI be longer than the key, so we
|
|
|
|
# specify that it is truncated to the same 128 bits as the AES key.
|
|
|
|
assert len(storage_index) == 16 # SHA-256 truncated to 128b
|
|
|
|
self._storage_index = storage_index
|
|
|
|
|
|
|
|
return e
|
|
|
|
d.addCallback(_got)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def get_storage_index(self):
|
|
|
|
d = self._get_encryptor()
|
|
|
|
d.addCallback(lambda res: self._storage_index)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _get_segment_hasher(self):
|
|
|
|
p = self._plaintext_segment_hasher
|
|
|
|
if p:
|
|
|
|
left = self._segment_size - self._plaintext_segment_hashed_bytes
|
|
|
|
return p, left
|
2007-08-28 02:00:18 +00:00
|
|
|
p = plaintext_segment_hasher()
|
2007-07-24 02:31:53 +00:00
|
|
|
self._plaintext_segment_hasher = p
|
|
|
|
self._plaintext_segment_hashed_bytes = 0
|
|
|
|
return p, self._segment_size
|
|
|
|
|
|
|
|
def _update_segment_hash(self, chunk):
|
|
|
|
offset = 0
|
|
|
|
while offset < len(chunk):
|
|
|
|
p, segment_left = self._get_segment_hasher()
|
|
|
|
chunk_left = len(chunk) - offset
|
|
|
|
this_segment = min(chunk_left, segment_left)
|
|
|
|
p.update(chunk[offset:offset+this_segment])
|
|
|
|
self._plaintext_segment_hashed_bytes += this_segment
|
|
|
|
|
|
|
|
if self._plaintext_segment_hashed_bytes == self._segment_size:
|
|
|
|
# we've filled this segment
|
|
|
|
self._plaintext_segment_hashes.append(p.digest())
|
|
|
|
self._plaintext_segment_hasher = None
|
2008-01-17 08:11:35 +00:00
|
|
|
self.log("closed hash [%d]: %dB" %
|
|
|
|
(len(self._plaintext_segment_hashes)-1,
|
|
|
|
self._plaintext_segment_hashed_bytes),
|
|
|
|
level=log.NOISY)
|
|
|
|
self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
|
|
|
|
segnum=len(self._plaintext_segment_hashes)-1,
|
|
|
|
hash=idlib.b2a(p.digest()),
|
|
|
|
level=log.NOISY)
|
2007-07-24 02:31:53 +00:00
|
|
|
|
|
|
|
offset += this_segment
|
|
|
|
|
2008-01-25 04:51:34 +00:00
|
|
|
|
2008-01-25 00:25:33 +00:00
|
|
|
def read_encrypted(self, length, hash_only):
|
2008-01-16 10:03:35 +00:00
|
|
|
# make sure our parameters have been set up first
|
|
|
|
d = self.get_all_encoding_parameters()
|
|
|
|
d.addCallback(lambda ignored: self._get_encryptor())
|
2008-01-25 04:51:34 +00:00
|
|
|
# then fetch and encrypt the plaintext. The unusual structure here
|
|
|
|
# (passing a Deferred *into* a function) is needed to avoid
|
|
|
|
# overflowing the stack: Deferreds don't optimize out tail recursion.
|
|
|
|
# We also pass in a list, to which _read_encrypted will append
|
|
|
|
# ciphertext.
|
2008-01-25 00:25:33 +00:00
|
|
|
ciphertext = []
|
2008-01-25 04:51:34 +00:00
|
|
|
d2 = defer.Deferred()
|
|
|
|
d.addCallback(lambda ignored:
|
|
|
|
self._read_encrypted(length, ciphertext, hash_only, d2))
|
|
|
|
d.addCallback(lambda ignored: d2)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _read_encrypted(self, remaining, ciphertext, hash_only, fire_when_done):
|
|
|
|
if not remaining:
|
|
|
|
fire_when_done.callback(ciphertext)
|
|
|
|
return None
|
|
|
|
# tolerate large length= values without consuming a lot of RAM by
|
|
|
|
# reading just a chunk (say 50kB) at a time. This only really matters
|
|
|
|
# when hash_only==True (i.e. resuming an interrupted upload), since
|
|
|
|
# that's the case where we will be skipping over a lot of data.
|
|
|
|
size = min(remaining, self.CHUNKSIZE)
|
|
|
|
remaining = remaining - size
|
|
|
|
# read a chunk of plaintext..
|
|
|
|
d = defer.maybeDeferred(self.original.read, size)
|
|
|
|
# N.B.: if read() is synchronous, then since everything else is
|
|
|
|
# actually synchronous too, we'd blow the stack unless we stall for a
|
|
|
|
# tick. Once you accept a Deferred from IUploadable.read(), you must
|
|
|
|
# be prepared to have it fire immediately too.
|
|
|
|
d.addCallback(eventual.fireEventually)
|
|
|
|
def _good(plaintext):
|
2008-01-25 00:25:33 +00:00
|
|
|
# and encrypt it..
|
|
|
|
# o/' over the fields we go, hashing all the way, sHA! sHA! sHA! o/'
|
2008-01-25 04:51:34 +00:00
|
|
|
ct = self._hash_and_encrypt_plaintext(plaintext, hash_only)
|
|
|
|
ciphertext.extend(ct)
|
|
|
|
self._read_encrypted(remaining, ciphertext, hash_only,
|
|
|
|
fire_when_done)
|
|
|
|
def _err(why):
|
|
|
|
fire_when_done.errback(why)
|
|
|
|
d.addCallback(_good)
|
|
|
|
d.addErrback(_err)
|
|
|
|
return None
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2008-01-25 00:25:33 +00:00
|
|
|
def _hash_and_encrypt_plaintext(self, data, hash_only):
|
|
|
|
assert isinstance(data, (tuple, list)), type(data)
|
|
|
|
data = list(data)
|
|
|
|
cryptdata = []
|
|
|
|
# we use data.pop(0) instead of 'for chunk in data' to save
|
|
|
|
# memory: each chunk is destroyed as soon as we're done with it.
|
|
|
|
while data:
|
|
|
|
chunk = data.pop(0)
|
|
|
|
log.msg(" read_encrypted handling %dB-sized chunk" % len(chunk),
|
|
|
|
level=log.NOISY)
|
|
|
|
self._plaintext_hasher.update(chunk)
|
|
|
|
self._update_segment_hash(chunk)
|
|
|
|
# TODO: we have to encrypt the data (even if hash_only==True)
|
|
|
|
# because pycryptopp's AES-CTR implementation doesn't offer a
|
|
|
|
# way to change the counter value. Once pycryptopp acquires
|
|
|
|
# this ability, change this to simply update the counter
|
|
|
|
# before each call to (hash_only==False) _encryptor.process()
|
|
|
|
ciphertext = self._encryptor.process(chunk)
|
2008-01-29 01:38:38 +00:00
|
|
|
if hash_only:
|
2008-01-25 00:25:33 +00:00
|
|
|
log.msg(" skipping encryption")
|
2008-01-29 01:38:38 +00:00
|
|
|
else:
|
2008-01-25 00:25:33 +00:00
|
|
|
cryptdata.append(ciphertext)
|
|
|
|
del ciphertext
|
|
|
|
del chunk
|
|
|
|
return cryptdata
|
|
|
|
|
2008-01-25 04:51:34 +00:00
|
|
|
|
2008-01-10 00:58:47 +00:00
|
|
|
def get_plaintext_hashtree_leaves(self, first, last, num_segments):
|
2007-07-24 02:31:53 +00:00
|
|
|
if len(self._plaintext_segment_hashes) < num_segments:
|
|
|
|
# close out the last one
|
|
|
|
assert len(self._plaintext_segment_hashes) == num_segments-1
|
|
|
|
p, segment_left = self._get_segment_hasher()
|
|
|
|
self._plaintext_segment_hashes.append(p.digest())
|
|
|
|
del self._plaintext_segment_hasher
|
2008-01-17 08:11:35 +00:00
|
|
|
self.log("closing plaintext leaf hasher, hashed %d bytes" %
|
|
|
|
self._plaintext_segment_hashed_bytes,
|
|
|
|
level=log.NOISY)
|
|
|
|
self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
|
|
|
|
segnum=len(self._plaintext_segment_hashes)-1,
|
|
|
|
hash=idlib.b2a(p.digest()),
|
|
|
|
level=log.NOISY)
|
2007-07-24 02:31:53 +00:00
|
|
|
assert len(self._plaintext_segment_hashes) == num_segments
|
2008-01-10 00:58:47 +00:00
|
|
|
return defer.succeed(tuple(self._plaintext_segment_hashes[first:last]))
|
2007-07-24 02:31:53 +00:00
|
|
|
|
|
|
|
def get_plaintext_hash(self):
|
|
|
|
h = self._plaintext_hasher.digest()
|
|
|
|
return defer.succeed(h)
|
|
|
|
|
2008-01-17 08:52:33 +00:00
|
|
|
def close(self):
|
|
|
|
return self.original.close()
|
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
class CHKUploader:
|
2007-09-16 08:25:03 +00:00
|
|
|
peer_selector_class = Tahoe2PeerSelector
|
2007-07-20 01:21:44 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def __init__(self, client, default_encoding_parameters):
|
2007-07-20 01:21:44 +00:00
|
|
|
self._client = client
|
2008-01-16 10:03:35 +00:00
|
|
|
assert isinstance(default_encoding_parameters, dict)
|
|
|
|
self._default_encoding_parameters = default_encoding_parameters
|
2007-11-20 02:33:41 +00:00
|
|
|
self._log_number = self._client.log("CHKUploader starting")
|
2008-01-15 04:22:55 +00:00
|
|
|
self._encoder = None
|
2007-07-20 01:21:44 +00:00
|
|
|
|
2008-01-15 04:19:20 +00:00
|
|
|
def log(self, *args, **kwargs):
|
|
|
|
if "parent" not in kwargs:
|
|
|
|
kwargs["parent"] = self._log_number
|
|
|
|
if "facility" not in kwargs:
|
|
|
|
kwargs["facility"] = "tahoe.upload"
|
|
|
|
return self._client.log(*args, **kwargs)
|
2007-11-20 02:33:41 +00:00
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
def start(self, uploadable):
|
2007-07-20 01:21:44 +00:00
|
|
|
"""Start uploading the file.
|
|
|
|
|
|
|
|
This method returns a Deferred that will fire with the URI (a
|
|
|
|
string)."""
|
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
uploadable = IUploadable(uploadable)
|
2007-11-20 02:33:41 +00:00
|
|
|
self.log("starting upload of %s" % uploadable)
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
eu = EncryptAnUploadable(uploadable, self._default_encoding_parameters)
|
2007-07-24 02:31:53 +00:00
|
|
|
d = self.start_encrypted(eu)
|
|
|
|
def _uploaded(res):
|
|
|
|
d1 = uploadable.get_encryption_key()
|
|
|
|
d1.addCallback(lambda key: self._compute_uri(res, key))
|
|
|
|
return d1
|
|
|
|
d.addCallback(_uploaded)
|
|
|
|
return d
|
2007-07-20 01:21:44 +00:00
|
|
|
|
2008-01-15 04:22:55 +00:00
|
|
|
def abort(self):
|
|
|
|
"""Call this is the upload must be abandoned before it completes.
|
|
|
|
This will tell the shareholders to delete their partial shares. I
|
|
|
|
return a Deferred that fires when these messages have been acked."""
|
|
|
|
if not self._encoder:
|
|
|
|
# how did you call abort() before calling start() ?
|
|
|
|
return defer.succeed(None)
|
|
|
|
return self._encoder.abort()
|
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
def start_encrypted(self, encrypted):
|
|
|
|
eu = IEncryptedUploadable(encrypted)
|
|
|
|
|
2008-01-17 08:11:35 +00:00
|
|
|
self._encoder = e = encode.Encoder(self._log_number)
|
2007-07-24 02:31:53 +00:00
|
|
|
d = e.set_encrypted_uploadable(eu)
|
2007-07-20 01:21:44 +00:00
|
|
|
d.addCallback(self.locate_all_shareholders)
|
2007-07-24 02:31:53 +00:00
|
|
|
d.addCallback(self.set_shareholders, e)
|
|
|
|
d.addCallback(lambda res: e.start())
|
|
|
|
# this fires with the uri_extension_hash and other data
|
2007-07-20 01:21:44 +00:00
|
|
|
return d
|
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
def locate_all_shareholders(self, encoder):
|
|
|
|
storage_index = encoder.get_param("storage_index")
|
2007-09-16 08:24:07 +00:00
|
|
|
upload_id = idlib.b2a(storage_index)[:6]
|
2007-11-20 02:33:41 +00:00
|
|
|
self.log("using storage index %s" % upload_id)
|
2008-01-15 04:19:20 +00:00
|
|
|
peer_selector = self.peer_selector_class(upload_id, self._log_number)
|
2007-09-16 08:24:07 +00:00
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
share_size = encoder.get_param("share_size")
|
|
|
|
block_size = encoder.get_param("block_size")
|
|
|
|
num_segments = encoder.get_param("num_segments")
|
|
|
|
k,desired,n = encoder.get_param("share_counts")
|
2008-01-16 10:03:35 +00:00
|
|
|
push_to_ourselves = self._client.get_push_to_ourselves()
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
gs = peer_selector.get_shareholders
|
2007-07-24 02:31:53 +00:00
|
|
|
d = gs(self._client, storage_index, share_size, block_size,
|
2007-08-10 01:30:24 +00:00
|
|
|
num_segments, n, desired, push_to_ourselves)
|
2007-07-20 01:21:44 +00:00
|
|
|
return d
|
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
def set_shareholders(self, used_peers, encoder):
|
2007-03-30 21:54:33 +00:00
|
|
|
"""
|
|
|
|
@param used_peers: a sequence of PeerTracker objects
|
|
|
|
"""
|
2007-11-20 02:33:41 +00:00
|
|
|
self.log("_send_shares, used_peers is %s" % (used_peers,))
|
2007-03-30 21:54:33 +00:00
|
|
|
for peer in used_peers:
|
|
|
|
assert isinstance(peer, PeerTracker)
|
2007-03-30 03:19:52 +00:00
|
|
|
buckets = {}
|
|
|
|
for peer in used_peers:
|
|
|
|
buckets.update(peer.buckets)
|
|
|
|
assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
|
2007-07-24 02:31:53 +00:00
|
|
|
encoder.set_shareholders(buckets)
|
2007-06-02 01:48:01 +00:00
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
def _compute_uri(self, (uri_extension_hash,
|
|
|
|
needed_shares, total_shares, size),
|
|
|
|
key):
|
|
|
|
u = uri.CHKFileURI(key=key,
|
2007-07-21 22:40:36 +00:00
|
|
|
uri_extension_hash=uri_extension_hash,
|
2007-07-24 02:31:53 +00:00
|
|
|
needed_shares=needed_shares,
|
|
|
|
total_shares=total_shares,
|
|
|
|
size=size,
|
2007-07-21 22:40:36 +00:00
|
|
|
)
|
|
|
|
return u.to_string()
|
2006-12-01 09:54:28 +00:00
|
|
|
|
2008-01-10 03:25:50 +00:00
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
def read_this_many_bytes(uploadable, size, prepend_data=[]):
|
2007-07-20 05:53:29 +00:00
|
|
|
if size == 0:
|
|
|
|
return defer.succeed([])
|
2007-07-20 01:21:44 +00:00
|
|
|
d = uploadable.read(size)
|
|
|
|
def _got(data):
|
2007-07-20 05:53:29 +00:00
|
|
|
assert isinstance(data, list)
|
2007-07-20 01:21:44 +00:00
|
|
|
bytes = sum([len(piece) for piece in data])
|
|
|
|
assert bytes > 0
|
|
|
|
assert bytes <= size
|
|
|
|
remaining = size - bytes
|
|
|
|
if remaining:
|
|
|
|
return read_this_many_bytes(uploadable, remaining,
|
|
|
|
prepend_data + data)
|
|
|
|
return prepend_data + data
|
|
|
|
d.addCallback(_got)
|
|
|
|
return d
|
|
|
|
|
2007-07-12 20:22:36 +00:00
|
|
|
class LiteralUploader:
|
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def __init__(self, client):
|
2007-07-12 20:22:36 +00:00
|
|
|
self._client = client
|
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
def set_params(self, encoding_parameters):
|
|
|
|
pass
|
2007-07-12 20:22:36 +00:00
|
|
|
|
2007-07-24 02:31:53 +00:00
|
|
|
def start(self, uploadable):
|
|
|
|
uploadable = IUploadable(uploadable)
|
|
|
|
d = uploadable.get_size()
|
|
|
|
d.addCallback(lambda size: read_this_many_bytes(uploadable, size))
|
2007-07-21 22:40:36 +00:00
|
|
|
d.addCallback(lambda data: uri.LiteralFileURI("".join(data)))
|
|
|
|
d.addCallback(lambda u: u.to_string())
|
2007-07-20 01:21:44 +00:00
|
|
|
return d
|
2007-07-12 20:22:36 +00:00
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
def close(self):
|
|
|
|
pass
|
2007-01-16 04:22:22 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
class RemoteEncryptedUploadable(Referenceable):
|
2008-01-10 00:58:47 +00:00
|
|
|
implements(RIEncryptedUploadable)
|
|
|
|
|
|
|
|
def __init__(self, encrypted_uploadable):
|
|
|
|
self._eu = IEncryptedUploadable(encrypted_uploadable)
|
|
|
|
self._offset = 0
|
2008-01-17 08:16:56 +00:00
|
|
|
self._bytes_sent = 0
|
2008-01-15 04:22:55 +00:00
|
|
|
self._cutoff = None # set by debug options
|
|
|
|
self._cutoff_cb = None
|
2008-01-10 00:58:47 +00:00
|
|
|
|
|
|
|
def remote_get_size(self):
|
|
|
|
return self._eu.get_size()
|
2008-01-16 10:03:35 +00:00
|
|
|
def remote_get_all_encoding_parameters(self):
|
|
|
|
return self._eu.get_all_encoding_parameters()
|
|
|
|
|
2008-01-25 00:25:33 +00:00
|
|
|
def _read_encrypted(self, length, hash_only):
|
|
|
|
d = self._eu.read_encrypted(length, hash_only)
|
|
|
|
def _read(strings):
|
|
|
|
if hash_only:
|
|
|
|
self._offset += length
|
|
|
|
else:
|
|
|
|
size = sum([len(data) for data in strings])
|
|
|
|
self._offset += size
|
|
|
|
return strings
|
|
|
|
d.addCallback(_read)
|
|
|
|
return d
|
|
|
|
|
2008-01-10 00:58:47 +00:00
|
|
|
def remote_read_encrypted(self, offset, length):
|
2008-01-17 08:16:56 +00:00
|
|
|
# we don't support seek backwards, but we allow skipping forwards
|
|
|
|
precondition(offset >= 0, offset)
|
|
|
|
precondition(length >= 0, length)
|
|
|
|
lp = log.msg("remote_read_encrypted(%d-%d)" % (offset, offset+length),
|
|
|
|
level=log.NOISY)
|
|
|
|
precondition(offset >= self._offset, offset, self._offset)
|
|
|
|
if offset > self._offset:
|
|
|
|
# read the data from disk anyways, to build up the hash tree
|
|
|
|
skip = offset - self._offset
|
2008-01-29 02:13:36 +00:00
|
|
|
log.msg("remote_read_encrypted skipping ahead from %d to %d, skip=%d" %
|
|
|
|
(self._offset, offset, skip), level=log.UNUSUAL, parent=lp)
|
2008-01-25 00:25:33 +00:00
|
|
|
d = self._read_encrypted(skip, hash_only=True)
|
|
|
|
else:
|
|
|
|
d = defer.succeed(None)
|
|
|
|
|
|
|
|
def _at_correct_offset(res):
|
|
|
|
assert offset == self._offset, "%d != %d" % (offset, self._offset)
|
|
|
|
if self._cutoff is not None and offset+length > self._cutoff:
|
|
|
|
self._cutoff_cb()
|
|
|
|
|
|
|
|
return self._read_encrypted(length, hash_only=False)
|
|
|
|
d.addCallback(_at_correct_offset)
|
2008-01-17 08:16:56 +00:00
|
|
|
|
2008-01-11 12:42:55 +00:00
|
|
|
def _read(strings):
|
2008-01-15 04:22:55 +00:00
|
|
|
size = sum([len(data) for data in strings])
|
2008-01-17 08:16:56 +00:00
|
|
|
self._bytes_sent += size
|
2008-01-11 12:42:55 +00:00
|
|
|
return strings
|
2008-01-10 00:58:47 +00:00
|
|
|
d.addCallback(_read)
|
|
|
|
return d
|
2008-01-25 00:25:33 +00:00
|
|
|
|
2008-01-11 12:42:55 +00:00
|
|
|
def remote_get_plaintext_hashtree_leaves(self, first, last, num_segments):
|
2008-01-17 08:11:35 +00:00
|
|
|
log.msg("remote_get_plaintext_hashtree_leaves: %d-%d of %d" %
|
|
|
|
(first, last-1, num_segments),
|
|
|
|
level=log.NOISY)
|
2008-01-11 12:42:55 +00:00
|
|
|
d = self._eu.get_plaintext_hashtree_leaves(first, last, num_segments)
|
|
|
|
d.addCallback(list)
|
|
|
|
return d
|
2008-01-10 00:58:47 +00:00
|
|
|
def remote_get_plaintext_hash(self):
|
|
|
|
return self._eu.get_plaintext_hash()
|
2008-01-17 08:52:33 +00:00
|
|
|
def remote_close(self):
|
|
|
|
return self._eu.close()
|
2008-01-10 00:58:47 +00:00
|
|
|
|
|
|
|
|
|
|
|
class AssistedUploader:
|
2008-01-09 04:18:54 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def __init__(self, helper, default_encoding_parameters):
|
2008-01-09 04:18:54 +00:00
|
|
|
self._helper = helper
|
2008-01-16 10:03:35 +00:00
|
|
|
assert isinstance(default_encoding_parameters, dict)
|
|
|
|
self._default_encoding_parameters = default_encoding_parameters
|
2008-01-10 03:25:50 +00:00
|
|
|
self._log_number = log.msg("AssistedUploader starting")
|
|
|
|
|
|
|
|
def log(self, msg, parent=None, **kwargs):
|
|
|
|
if parent is None:
|
|
|
|
parent = self._log_number
|
|
|
|
return log.msg(msg, parent=parent, **kwargs)
|
2008-01-09 04:18:54 +00:00
|
|
|
|
|
|
|
def start(self, uploadable):
|
2008-01-10 00:58:47 +00:00
|
|
|
u = IUploadable(uploadable)
|
2008-01-16 10:03:35 +00:00
|
|
|
eu = EncryptAnUploadable(u, self._default_encoding_parameters)
|
2008-01-09 04:18:54 +00:00
|
|
|
self._encuploadable = eu
|
|
|
|
d = eu.get_size()
|
|
|
|
d.addCallback(self._got_size)
|
2008-01-16 10:03:35 +00:00
|
|
|
d.addCallback(lambda res: eu.get_all_encoding_parameters())
|
|
|
|
d.addCallback(self._got_all_encoding_parameters)
|
2008-01-09 04:18:54 +00:00
|
|
|
# when we get the encryption key, that will also compute the storage
|
|
|
|
# index, so this only takes one pass.
|
|
|
|
# TODO: I'm not sure it's cool to switch back and forth between
|
|
|
|
# the Uploadable and the IEncryptedUploadable that wraps it.
|
|
|
|
d.addCallback(lambda res: u.get_encryption_key())
|
|
|
|
d.addCallback(self._got_encryption_key)
|
|
|
|
d.addCallback(lambda res: eu.get_storage_index())
|
|
|
|
d.addCallback(self._got_storage_index)
|
|
|
|
d.addCallback(self._contact_helper)
|
|
|
|
d.addCallback(self._build_readcap)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _got_size(self, size):
|
|
|
|
self._size = size
|
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def _got_all_encoding_parameters(self, params):
|
|
|
|
k, happy, n, segment_size = params
|
|
|
|
# stash these for URI generation later
|
|
|
|
self._needed_shares = k
|
|
|
|
self._total_shares = n
|
|
|
|
|
2008-01-09 04:18:54 +00:00
|
|
|
def _got_encryption_key(self, key):
|
|
|
|
self._key = key
|
|
|
|
|
|
|
|
def _got_storage_index(self, storage_index):
|
|
|
|
self._storage_index = storage_index
|
|
|
|
|
|
|
|
def _contact_helper(self, res):
|
2008-01-11 11:53:37 +00:00
|
|
|
self.log("contacting helper..")
|
|
|
|
d = self._helper.callRemote("upload_chk", self._storage_index)
|
2008-01-09 04:18:54 +00:00
|
|
|
d.addCallback(self._contacted_helper)
|
|
|
|
return d
|
2008-01-10 03:25:50 +00:00
|
|
|
def _contacted_helper(self, (upload_results, upload_helper)):
|
2008-01-09 04:18:54 +00:00
|
|
|
if upload_helper:
|
2008-01-11 11:53:37 +00:00
|
|
|
self.log("helper says we need to upload")
|
2008-01-09 04:18:54 +00:00
|
|
|
# we need to upload the file
|
2008-01-16 10:03:35 +00:00
|
|
|
reu = RemoteEncryptedUploadable(self._encuploadable)
|
2008-01-17 08:16:56 +00:00
|
|
|
|
|
|
|
# we have unit tests which want to interrupt the upload so they
|
|
|
|
# can exercise resumability. They indicate this by adding debug_
|
|
|
|
# attributes to the Uploadable.
|
|
|
|
if hasattr(self._encuploadable.original,
|
|
|
|
"debug_stash_RemoteEncryptedUploadable"):
|
|
|
|
# we communicate back to them the same way. This may look
|
|
|
|
# weird, but, well, ok, it is. However, it is better than the
|
|
|
|
# barrage of options={} dictionaries that were flying around
|
|
|
|
# before. We could also do this by setting attributes on the
|
|
|
|
# class, but that doesn't make it easy to undo when we're
|
|
|
|
# done. TODO: find a cleaner way, maybe just a small options=
|
|
|
|
# dict somewhere.
|
|
|
|
self._encuploadable.original.debug_RemoteEncryptedUploadable = reu
|
|
|
|
if hasattr(self._encuploadable.original, "debug_interrupt"):
|
|
|
|
reu._cutoff = self._encuploadable.original.debug_interrupt
|
2008-01-15 04:22:55 +00:00
|
|
|
def _cutoff():
|
|
|
|
# simulate the loss of the connection to the helper
|
|
|
|
self.log("debug_interrupt killing connection to helper",
|
|
|
|
level=log.WEIRD)
|
|
|
|
upload_helper.tracker.broker.transport.loseConnection()
|
|
|
|
return
|
|
|
|
reu._cutoff_cb = _cutoff
|
2008-01-09 04:18:54 +00:00
|
|
|
d = upload_helper.callRemote("upload", reu)
|
|
|
|
# this Deferred will fire with the upload results
|
|
|
|
return d
|
2008-01-11 11:53:37 +00:00
|
|
|
self.log("helper says file is already uploaded")
|
2008-01-09 04:18:54 +00:00
|
|
|
return upload_results
|
|
|
|
|
|
|
|
def _build_readcap(self, upload_results):
|
2008-01-30 00:38:12 +00:00
|
|
|
self.log("upload finished, building readcap")
|
2008-01-09 04:18:54 +00:00
|
|
|
ur = upload_results
|
|
|
|
u = uri.CHKFileURI(key=self._key,
|
|
|
|
uri_extension_hash=ur['uri_extension_hash'],
|
|
|
|
needed_shares=self._needed_shares,
|
|
|
|
total_shares=self._total_shares,
|
|
|
|
size=self._size,
|
|
|
|
)
|
|
|
|
return u.to_string()
|
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
class NoParameterPreferencesMixin:
|
2008-01-17 08:17:42 +00:00
|
|
|
max_segment_size = None
|
|
|
|
encoding_parameters = None
|
2008-01-16 10:03:35 +00:00
|
|
|
def get_maximum_segment_size(self):
|
2008-01-17 08:17:42 +00:00
|
|
|
return defer.succeed(self.max_segment_size)
|
2008-01-16 10:03:35 +00:00
|
|
|
def get_encoding_parameters(self):
|
2008-01-17 08:17:42 +00:00
|
|
|
return defer.succeed(self.encoding_parameters)
|
2006-12-04 02:07:41 +00:00
|
|
|
|
2008-01-30 19:24:50 +00:00
|
|
|
class FileHandle(NoParameterPreferencesMixin):
|
|
|
|
implements(IUploadable)
|
2007-07-24 02:31:53 +00:00
|
|
|
|
2008-01-30 19:24:50 +00:00
|
|
|
def __init__(self, filehandle, contenthashkey=True):
|
|
|
|
self._filehandle = filehandle
|
|
|
|
self._key = None
|
|
|
|
self._contenthashkey = contenthashkey
|
|
|
|
|
|
|
|
def _get_encryption_key_content_hash(self):
|
2007-07-24 02:31:53 +00:00
|
|
|
if self._key is None:
|
|
|
|
f = self._filehandle
|
2007-08-28 02:00:18 +00:00
|
|
|
enckey_hasher = key_hasher()
|
2007-07-24 02:31:53 +00:00
|
|
|
#enckey_hasher.update(encoding_parameters) # TODO
|
|
|
|
f.seek(0)
|
|
|
|
BLOCKSIZE = 64*1024
|
|
|
|
while True:
|
|
|
|
data = f.read(BLOCKSIZE)
|
|
|
|
if not data:
|
|
|
|
break
|
|
|
|
enckey_hasher.update(data)
|
|
|
|
f.seek(0)
|
|
|
|
self._key = enckey_hasher.digest()[:16]
|
|
|
|
|
|
|
|
return defer.succeed(self._key)
|
2007-07-20 01:21:44 +00:00
|
|
|
|
2008-01-30 19:24:50 +00:00
|
|
|
def _get_encryption_key_random(self):
|
2007-07-24 02:31:53 +00:00
|
|
|
if self._key is None:
|
|
|
|
self._key = os.urandom(16)
|
|
|
|
return defer.succeed(self._key)
|
2007-07-20 01:21:44 +00:00
|
|
|
|
2008-01-30 19:24:50 +00:00
|
|
|
def get_encryption_key(self):
|
|
|
|
if self._contenthashkey:
|
|
|
|
return self._get_encryption_key_content_hash()
|
|
|
|
else:
|
|
|
|
return self._get_encryption_key_random()
|
2007-07-20 01:21:44 +00:00
|
|
|
|
|
|
|
def get_size(self):
|
|
|
|
self._filehandle.seek(0,2)
|
|
|
|
size = self._filehandle.tell()
|
|
|
|
self._filehandle.seek(0)
|
|
|
|
return defer.succeed(size)
|
|
|
|
|
|
|
|
def read(self, length):
|
|
|
|
return defer.succeed([self._filehandle.read(length)])
|
|
|
|
|
|
|
|
def close(self):
|
2006-12-04 02:07:41 +00:00
|
|
|
# the originator of the filehandle reserves the right to close it
|
|
|
|
pass
|
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
class FileName(FileHandle):
|
2008-01-30 19:24:50 +00:00
|
|
|
def __init__(self, filename, contenthashkey=True):
|
|
|
|
FileHandle.__init__(self, open(filename, "rb"), contenthashkey=contenthashkey)
|
2007-07-20 01:21:44 +00:00
|
|
|
def close(self):
|
|
|
|
FileHandle.close(self)
|
|
|
|
self._filehandle.close()
|
|
|
|
|
|
|
|
class Data(FileHandle):
|
2008-01-31 02:02:56 +00:00
|
|
|
def __init__(self, data, contenthashkey=True):
|
2008-01-30 19:24:50 +00:00
|
|
|
FileHandle.__init__(self, StringIO(data), contenthashkey=contenthashkey)
|
2007-07-20 01:21:44 +00:00
|
|
|
|
2006-12-03 01:27:18 +00:00
|
|
|
class Uploader(service.MultiService):
|
|
|
|
"""I am a service that allows file uploading.
|
|
|
|
"""
|
2007-01-21 22:01:34 +00:00
|
|
|
implements(IUploader)
|
2006-12-03 01:27:18 +00:00
|
|
|
name = "uploader"
|
2007-07-20 01:21:44 +00:00
|
|
|
uploader_class = CHKUploader
|
2007-07-12 20:22:36 +00:00
|
|
|
URI_LIT_SIZE_THRESHOLD = 55
|
2006-12-03 01:27:18 +00:00
|
|
|
|
2008-01-09 04:18:54 +00:00
|
|
|
def __init__(self, helper_furl=None):
|
|
|
|
self._helper_furl = helper_furl
|
|
|
|
self._helper = None
|
2008-01-10 00:58:47 +00:00
|
|
|
service.MultiService.__init__(self)
|
2008-01-09 04:18:54 +00:00
|
|
|
|
|
|
|
def startService(self):
|
|
|
|
service.MultiService.startService(self)
|
|
|
|
if self._helper_furl:
|
|
|
|
self.parent.tub.connectTo(self._helper_furl,
|
|
|
|
self._got_helper)
|
|
|
|
|
|
|
|
def _got_helper(self, helper):
|
|
|
|
self._helper = helper
|
|
|
|
|
2008-01-28 20:56:22 +00:00
|
|
|
def get_helper_info(self):
|
|
|
|
# return a tuple of (helper_furl_or_None, connected_bool)
|
|
|
|
return (self._helper_furl, bool(self._helper))
|
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def upload(self, uploadable):
|
2007-01-19 09:23:03 +00:00
|
|
|
# this returns the URI
|
2006-12-03 01:27:18 +00:00
|
|
|
assert self.parent
|
|
|
|
assert self.running
|
2007-08-10 01:30:24 +00:00
|
|
|
|
2007-07-20 01:21:44 +00:00
|
|
|
uploadable = IUploadable(uploadable)
|
|
|
|
d = uploadable.get_size()
|
|
|
|
def _got_size(size):
|
2008-01-16 10:03:35 +00:00
|
|
|
default_params = self.parent.get_encoding_parameters()
|
|
|
|
precondition(isinstance(default_params, dict), default_params)
|
|
|
|
precondition("max_segment_size" in default_params, default_params)
|
2007-07-20 01:21:44 +00:00
|
|
|
if size <= self.URI_LIT_SIZE_THRESHOLD:
|
2008-01-16 10:03:35 +00:00
|
|
|
uploader = LiteralUploader(self.parent)
|
2008-01-09 04:18:54 +00:00
|
|
|
elif self._helper:
|
2008-01-16 10:03:35 +00:00
|
|
|
uploader = AssistedUploader(self._helper, default_params)
|
2008-01-09 04:18:54 +00:00
|
|
|
else:
|
2008-01-16 10:03:35 +00:00
|
|
|
uploader = self.uploader_class(self.parent, default_params)
|
2007-07-24 02:31:53 +00:00
|
|
|
return uploader.start(uploadable)
|
2007-07-20 01:21:44 +00:00
|
|
|
d.addCallback(_got_size)
|
2006-12-04 02:07:41 +00:00
|
|
|
def _done(res):
|
2007-07-20 01:21:44 +00:00
|
|
|
uploadable.close()
|
2006-12-04 02:07:41 +00:00
|
|
|
return res
|
|
|
|
d.addBoth(_done)
|
2006-12-03 01:27:18 +00:00
|
|
|
return d
|