2008-01-09 04:18:54 +00:00
|
|
|
|
2008-04-17 20:02:22 +00:00
|
|
|
import os, stat, time, weakref
|
2008-01-10 03:25:05 +00:00
|
|
|
from zope.interface import implements
|
2008-01-10 04:25:47 +00:00
|
|
|
from twisted.internet import defer
|
2009-05-22 00:38:23 +00:00
|
|
|
from foolscap.api import Referenceable, DeadReferenceError, eventually
|
versioning: include an "appname" in the application version string in the versioning protocol, and make that appname be controlled by setup.py
It is currently hardcoded in setup.py to be 'allmydata-tahoe'. Ticket #556 is to make it configurable by a runtime command-line argument to setup.py: "--appname=foo", but I suddenly wondered if we really wanted that and at the same time realized that we don't need that for tahoe-1.3.0 release, so this patch just hardcodes it in setup.py.
setup.py inspects a file named 'src/allmydata/_appname.py' and assert that it contains the string "__appname__ = 'allmydata-tahoe'", and creates it if it isn't already present. src/allmydata/__init__.py import _appname and reads __appname__ from it. The rest of the Python code imports allmydata and inspects "allmydata.__appname__", although actually every use it uses "allmydata.__full_version__" instead, where "allmydata.__full_version__" is created in src/allmydata/__init__.py to be:
__full_version__ = __appname + '-' + str(__version__).
All the code that emits an "application version string" when describing what version of a protocol it supports (introducer server, storage server, upload helper), or when describing itself in general (introducer client), usese allmydata.__full_version__.
This fixes ticket #556 at least well enough for tahoe-1.3.0 release.
2009-02-12 00:18:16 +00:00
|
|
|
import allmydata # for __full_version__
|
2009-02-18 21:46:55 +00:00
|
|
|
from allmydata import interfaces, uri
|
|
|
|
from allmydata.storage.server import si_b2a
|
2008-07-16 20:14:39 +00:00
|
|
|
from allmydata.immutable import upload
|
2008-10-10 00:08:00 +00:00
|
|
|
from allmydata.immutable.layout import ReadBucketProxy
|
2009-01-07 04:48:22 +00:00
|
|
|
from allmydata.util.assertutil import precondition
|
2009-02-09 21:45:43 +00:00
|
|
|
from allmydata.util import idlib, log, observer, fileutil, hashutil, dictutil
|
2008-01-09 04:18:54 +00:00
|
|
|
|
|
|
|
|
2008-01-15 05:20:03 +00:00
|
|
|
class NotEnoughWritersError(Exception):
|
|
|
|
pass
|
|
|
|
|
2008-01-09 04:18:54 +00:00
|
|
|
|
2008-01-31 01:49:02 +00:00
|
|
|
class CHKCheckerAndUEBFetcher:
|
|
|
|
"""I check to see if a file is already present in the grid. I also fetch
|
|
|
|
the URI Extension Block, which is useful for an uploading client who
|
|
|
|
wants to avoid the work of encryption and encoding.
|
|
|
|
|
|
|
|
I return False if the file is not completely healthy: i.e. if there are
|
|
|
|
less than 'N' shares present.
|
|
|
|
|
|
|
|
If the file is completely healthy, I return a tuple of (sharemap,
|
|
|
|
UEB_data, UEB_hash).
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, peer_getter, storage_index, logparent=None):
|
|
|
|
self._peer_getter = peer_getter
|
|
|
|
self._found_shares = set()
|
|
|
|
self._storage_index = storage_index
|
2009-02-09 21:45:43 +00:00
|
|
|
self._sharemap = dictutil.DictOfSets()
|
2008-01-31 01:49:02 +00:00
|
|
|
self._readers = set()
|
|
|
|
self._ueb_hash = None
|
|
|
|
self._ueb_data = None
|
|
|
|
self._logparent = logparent
|
|
|
|
|
|
|
|
def log(self, *args, **kwargs):
|
|
|
|
if 'facility' not in kwargs:
|
|
|
|
kwargs['facility'] = "tahoe.helper.chk.checkandUEBfetch"
|
|
|
|
if 'parent' not in kwargs:
|
|
|
|
kwargs['parent'] = self._logparent
|
|
|
|
return log.msg(*args, **kwargs)
|
|
|
|
|
|
|
|
def check(self):
|
|
|
|
d = self._get_all_shareholders(self._storage_index)
|
|
|
|
d.addCallback(self._get_uri_extension)
|
|
|
|
d.addCallback(self._done)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _get_all_shareholders(self, storage_index):
|
|
|
|
dl = []
|
2011-02-21 01:58:04 +00:00
|
|
|
for s in self._peer_getter(storage_index):
|
|
|
|
d = s.get_rref().callRemote("get_buckets", storage_index)
|
2008-01-31 01:49:02 +00:00
|
|
|
d.addCallbacks(self._got_response, self._got_error,
|
2011-02-21 01:58:04 +00:00
|
|
|
callbackArgs=(s.get_serverid(),))
|
2008-01-31 01:49:02 +00:00
|
|
|
dl.append(d)
|
|
|
|
return defer.DeferredList(dl)
|
|
|
|
|
|
|
|
def _got_response(self, buckets, peerid):
|
|
|
|
# buckets is a dict: maps shum to an rref of the server who holds it
|
|
|
|
shnums_s = ",".join([str(shnum) for shnum in buckets])
|
|
|
|
self.log("got_response: [%s] has %d shares (%s)" %
|
|
|
|
(idlib.shortnodeid_b2a(peerid), len(buckets), shnums_s),
|
|
|
|
level=log.NOISY)
|
|
|
|
self._found_shares.update(buckets.keys())
|
|
|
|
for k in buckets:
|
2009-02-09 21:45:43 +00:00
|
|
|
self._sharemap.add(k, peerid)
|
2008-02-27 00:33:14 +00:00
|
|
|
self._readers.update( [ (bucket, peerid)
|
|
|
|
for bucket in buckets.values() ] )
|
2008-01-31 01:49:02 +00:00
|
|
|
|
|
|
|
def _got_error(self, f):
|
2008-09-20 17:35:45 +00:00
|
|
|
if f.check(DeadReferenceError):
|
|
|
|
return
|
2008-01-31 01:49:02 +00:00
|
|
|
log.err(f, parent=self._logparent)
|
|
|
|
pass
|
|
|
|
|
|
|
|
def _get_uri_extension(self, res):
|
|
|
|
# assume that we can pull the UEB from any share. If we get an error,
|
|
|
|
# declare the whole file unavailable.
|
|
|
|
if not self._readers:
|
|
|
|
self.log("no readers, so no UEB", level=log.NOISY)
|
|
|
|
return
|
2008-02-27 00:33:14 +00:00
|
|
|
b,peerid = self._readers.pop()
|
2009-02-18 21:46:55 +00:00
|
|
|
rbp = ReadBucketProxy(b, peerid, si_b2a(self._storage_index))
|
immutable: refactor downloader to be more reusable for checker/verifier/repairer (and better)
The code for validating the share hash tree and the block hash tree has been rewritten to make sure it handles all cases, to share metadata about the file (such as the share hash tree, block hash trees, and UEB) among different share downloads, and not to require hashes to be stored on the server unnecessarily, such as the roots of the block hash trees (not needed since they are also the leaves of the share hash tree), and the root of the share hash tree (not needed since it is also included in the UEB). It also passes the latest tests including handling corrupted shares well.
ValidatedReadBucketProxy takes a share_hash_tree argument to its constructor, which is a reference to a share hash tree shared by all ValidatedReadBucketProxies for that immutable file download.
ValidatedReadBucketProxy requires the block_size and share_size to be provided in its constructor, and it then uses those to compute the offsets and lengths of blocks when it needs them, instead of reading those values out of the share. The user of ValidatedReadBucketProxy therefore has to have first used a ValidatedExtendedURIProxy to compute those two values from the validated contents of the URI. This is pleasingly simplifies safety analysis: the client knows which span of bytes corresponds to a given block from the validated URI data, rather than from the unvalidated data stored on the storage server. It also simplifies unit testing of verifier/repairer, because now it doesn't care about the contents of the "share size" and "block size" fields in the share. It does not relieve the need for share data v2 layout, because we still need to store and retrieve the offsets of the fields which come after the share data, therefore we still need to use share data v2 with its 8-byte fields if we want to store share data larger than about 2^32.
Specify which subset of the block hashes and share hashes you need while downloading a particular share. In the future this will hopefully be used to fetch only a subset, for network efficiency, but currently all of them are fetched, regardless of which subset you specify.
ReadBucketProxy hides the question of whether it has "started" or not (sent a request to the server to get metadata) from its user.
Download is optimized to do as few roundtrips and as few requests as possible, hopefully speeding up download a bit.
2009-01-05 16:51:45 +00:00
|
|
|
d = rbp.get_uri_extension()
|
2008-01-31 01:49:02 +00:00
|
|
|
d.addCallback(self._got_uri_extension)
|
|
|
|
d.addErrback(self._ueb_error)
|
|
|
|
return d
|
|
|
|
|
|
|
|
def _got_uri_extension(self, ueb):
|
|
|
|
self.log("_got_uri_extension", level=log.NOISY)
|
|
|
|
self._ueb_hash = hashutil.uri_extension_hash(ueb)
|
|
|
|
self._ueb_data = uri.unpack_extension(ueb)
|
|
|
|
|
|
|
|
def _ueb_error(self, f):
|
|
|
|
# an error means the file is unavailable, but the overall check
|
|
|
|
# shouldn't fail.
|
2008-08-26 01:57:59 +00:00
|
|
|
self.log("UEB fetch failed", failure=f, level=log.WEIRD, umid="sJLKVg")
|
2008-01-31 01:49:02 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
def _done(self, res):
|
|
|
|
if self._ueb_data:
|
|
|
|
found = len(self._found_shares)
|
|
|
|
total = self._ueb_data['total_shares']
|
|
|
|
self.log(format="got %(found)d shares of %(total)d",
|
|
|
|
found=found, total=total, level=log.NOISY)
|
|
|
|
if found < total:
|
|
|
|
# not all shares are present in the grid
|
|
|
|
self.log("not enough to qualify, file not found in grid",
|
|
|
|
level=log.NOISY)
|
|
|
|
return False
|
|
|
|
# all shares are present
|
|
|
|
self.log("all shares present, file is found in grid",
|
|
|
|
level=log.NOISY)
|
|
|
|
return (self._sharemap, self._ueb_data, self._ueb_hash)
|
|
|
|
# no shares are present
|
|
|
|
self.log("unable to find UEB data, file not found in grid",
|
|
|
|
level=log.NOISY)
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2008-01-10 04:25:47 +00:00
|
|
|
class CHKUploadHelper(Referenceable, upload.CHKUploader):
|
2008-01-10 03:25:05 +00:00
|
|
|
"""I am the helper-server -side counterpart to AssistedUploader. I handle
|
|
|
|
peer selection, encoding, and share pushing. I read ciphertext from the
|
|
|
|
remote AssistedUploader.
|
|
|
|
"""
|
2008-01-11 11:53:37 +00:00
|
|
|
implements(interfaces.RICHKUploadHelper)
|
2008-11-22 00:43:52 +00:00
|
|
|
VERSION = { "http://allmydata.org/tahoe/protocols/helper/chk-upload/v1" :
|
|
|
|
{ },
|
versioning: include an "appname" in the application version string in the versioning protocol, and make that appname be controlled by setup.py
It is currently hardcoded in setup.py to be 'allmydata-tahoe'. Ticket #556 is to make it configurable by a runtime command-line argument to setup.py: "--appname=foo", but I suddenly wondered if we really wanted that and at the same time realized that we don't need that for tahoe-1.3.0 release, so this patch just hardcodes it in setup.py.
setup.py inspects a file named 'src/allmydata/_appname.py' and assert that it contains the string "__appname__ = 'allmydata-tahoe'", and creates it if it isn't already present. src/allmydata/__init__.py import _appname and reads __appname__ from it. The rest of the Python code imports allmydata and inspects "allmydata.__appname__", although actually every use it uses "allmydata.__full_version__" instead, where "allmydata.__full_version__" is created in src/allmydata/__init__.py to be:
__full_version__ = __appname + '-' + str(__version__).
All the code that emits an "application version string" when describing what version of a protocol it supports (introducer server, storage server, upload helper), or when describing itself in general (introducer client), usese allmydata.__full_version__.
This fixes ticket #556 at least well enough for tahoe-1.3.0 release.
2009-02-12 00:18:16 +00:00
|
|
|
"application-version": str(allmydata.__full_version__),
|
2008-11-22 00:43:52 +00:00
|
|
|
}
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2009-08-15 20:17:37 +00:00
|
|
|
def __init__(self, storage_index,
|
|
|
|
helper, storage_broker, secret_holder,
|
2008-01-16 10:03:35 +00:00
|
|
|
incoming_file, encoding_file,
|
2008-02-06 08:52:25 +00:00
|
|
|
results, log_number):
|
2008-01-10 03:25:05 +00:00
|
|
|
self._storage_index = storage_index
|
|
|
|
self._helper = helper
|
2008-01-16 10:03:35 +00:00
|
|
|
self._incoming_file = incoming_file
|
|
|
|
self._encoding_file = encoding_file
|
2009-02-18 21:46:55 +00:00
|
|
|
self._upload_id = si_b2a(storage_index)[:5]
|
2008-01-15 04:24:26 +00:00
|
|
|
self._log_number = log_number
|
2008-02-06 08:52:25 +00:00
|
|
|
self._results = results
|
2008-02-12 22:36:05 +00:00
|
|
|
self._upload_status = upload.UploadStatus()
|
|
|
|
self._upload_status.set_helper(False)
|
2008-04-15 01:36:27 +00:00
|
|
|
self._upload_status.set_storage_index(storage_index)
|
|
|
|
self._upload_status.set_status("fetching ciphertext")
|
|
|
|
self._upload_status.set_progress(0, 1.0)
|
2008-04-15 02:26:53 +00:00
|
|
|
self._helper.log("CHKUploadHelper starting for SI %s" % self._upload_id,
|
2008-01-15 04:24:26 +00:00
|
|
|
parent=log_number)
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2009-08-15 20:17:37 +00:00
|
|
|
self._storage_broker = storage_broker
|
|
|
|
self._secret_holder = secret_holder
|
2008-01-17 08:11:35 +00:00
|
|
|
self._fetcher = CHKCiphertextFetcher(self, incoming_file, encoding_file,
|
|
|
|
self._log_number)
|
2008-01-16 10:03:35 +00:00
|
|
|
self._reader = LocalCiphertextReader(self, storage_index, encoding_file)
|
2008-01-15 05:20:03 +00:00
|
|
|
self._finished_observers = observer.OneShotObserverList()
|
2008-01-10 04:25:47 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
d = self._fetcher.when_done()
|
|
|
|
d.addCallback(lambda res: self._reader.start())
|
|
|
|
d.addCallback(lambda res: self.start_encrypted(self._reader))
|
|
|
|
d.addCallback(self._finished)
|
|
|
|
d.addErrback(self._failed)
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2008-01-15 04:24:26 +00:00
|
|
|
def log(self, *args, **kwargs):
|
|
|
|
if 'facility' not in kwargs:
|
2008-01-16 10:03:35 +00:00
|
|
|
kwargs['facility'] = "tahoe.helper.chk"
|
2008-01-15 04:24:26 +00:00
|
|
|
return upload.CHKUploader.log(self, *args, **kwargs)
|
|
|
|
|
2008-01-10 03:25:05 +00:00
|
|
|
def start(self):
|
2008-02-06 08:52:25 +00:00
|
|
|
self._started = time.time()
|
2008-01-10 03:25:05 +00:00
|
|
|
# determine if we need to upload the file. If so, return ({},self) .
|
|
|
|
# If not, return (UploadResults,None) .
|
2008-01-16 10:03:35 +00:00
|
|
|
self.log("deciding whether to upload the file or not", level=log.NOISY)
|
|
|
|
if os.path.exists(self._encoding_file):
|
2008-01-28 19:58:13 +00:00
|
|
|
# we have the whole file, and we might be encoding it (or the
|
|
|
|
# encode/upload might have failed, and we need to restart it).
|
|
|
|
self.log("ciphertext already in place", level=log.UNUSUAL)
|
2008-02-06 08:52:25 +00:00
|
|
|
return (self._results, self)
|
2008-01-16 10:03:35 +00:00
|
|
|
if os.path.exists(self._incoming_file):
|
|
|
|
# we have some of the file, but not all of it (otherwise we'd be
|
|
|
|
# encoding). The caller might be useful.
|
|
|
|
self.log("partial ciphertext already present", level=log.UNUSUAL)
|
2008-02-06 08:52:25 +00:00
|
|
|
return (self._results, self)
|
2008-01-31 01:49:02 +00:00
|
|
|
# we don't remember uploading this file
|
|
|
|
self.log("no ciphertext yet", level=log.NOISY)
|
2008-02-06 08:52:25 +00:00
|
|
|
return (self._results, self)
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2008-11-22 00:43:52 +00:00
|
|
|
def remote_get_version(self):
|
|
|
|
return self.VERSION
|
|
|
|
|
2008-01-10 03:25:05 +00:00
|
|
|
def remote_upload(self, reader):
|
|
|
|
# reader is an RIEncryptedUploadable. I am specified to return an
|
|
|
|
# UploadResults dictionary.
|
2008-01-10 04:25:47 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
# let our fetcher pull ciphertext from the reader.
|
|
|
|
self._fetcher.add_reader(reader)
|
|
|
|
# and also hashes
|
|
|
|
self._reader.add_reader(reader)
|
2008-01-15 04:24:26 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
# and inform the client when the upload has finished
|
2008-01-15 05:20:03 +00:00
|
|
|
return self._finished_observers.when_fired()
|
|
|
|
|
2009-01-07 04:48:22 +00:00
|
|
|
def _finished(self, uploadresults):
|
|
|
|
precondition(isinstance(uploadresults.verifycapstr, str), uploadresults.verifycapstr)
|
|
|
|
assert interfaces.IUploadResults.providedBy(uploadresults), uploadresults
|
|
|
|
r = uploadresults
|
|
|
|
v = uri.from_string(r.verifycapstr)
|
|
|
|
r.uri_extension_hash = v.uri_extension_hash
|
2008-02-06 08:52:25 +00:00
|
|
|
f_times = self._fetcher.get_times()
|
|
|
|
r.timings["cumulative_fetch"] = f_times["cumulative_fetch"]
|
2008-02-06 09:12:35 +00:00
|
|
|
r.ciphertext_fetched = self._fetcher.get_ciphertext_fetched()
|
2008-02-06 08:52:25 +00:00
|
|
|
r.timings["total_fetch"] = f_times["total"]
|
offloaded: reinstate fix for windows tests
in a discussion the other day, brian had asked me to try removing this fix, since
it leads to double-closing the reader. since on my windows box, the test failures
I'd experienced were related to the ConnectionLost exception problem, and this
close didn't see to make a difference to test results, I agreed.
turns out that the buildbot's environment does fail without this fix, even with
the exception fix, as I'd kind of expected.
it makes sense, because the reader (specifically the file handle) must be closed
before it can be unlinked. at any rate, I'm reinstating this, in order to fix the
windows build
2008-01-21 22:25:15 +00:00
|
|
|
self._reader.close()
|
2008-01-17 08:15:54 +00:00
|
|
|
os.unlink(self._encoding_file)
|
2008-02-06 08:52:25 +00:00
|
|
|
self._finished_observers.fire(r)
|
2009-01-07 04:48:22 +00:00
|
|
|
self._helper.upload_finished(self._storage_index, v.size)
|
2008-01-16 10:03:35 +00:00
|
|
|
del self._reader
|
2008-01-15 05:20:03 +00:00
|
|
|
|
|
|
|
def _failed(self, f):
|
2008-04-15 01:36:27 +00:00
|
|
|
self.log(format="CHKUploadHelper(%(si)s) failed",
|
2009-02-18 21:46:55 +00:00
|
|
|
si=si_b2a(self._storage_index)[:5],
|
2008-04-15 01:36:27 +00:00
|
|
|
failure=f,
|
|
|
|
level=log.UNUSUAL)
|
2008-01-15 05:20:03 +00:00
|
|
|
self._finished_observers.fire(f)
|
2008-03-27 23:46:08 +00:00
|
|
|
self._helper.upload_finished(self._storage_index, 0)
|
2008-01-16 10:03:35 +00:00
|
|
|
del self._reader
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
class AskUntilSuccessMixin:
|
|
|
|
# create me with a _reader array
|
2008-01-17 08:11:35 +00:00
|
|
|
_last_failure = None
|
2008-01-10 04:25:47 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def add_reader(self, reader):
|
|
|
|
self._readers.append(reader)
|
|
|
|
|
|
|
|
def call(self, *args, **kwargs):
|
|
|
|
if not self._readers:
|
2008-01-17 08:11:35 +00:00
|
|
|
raise NotEnoughWritersError("ran out of assisted uploaders, last failure was %s" % self._last_failure)
|
2008-01-16 10:03:35 +00:00
|
|
|
rr = self._readers[0]
|
|
|
|
d = rr.callRemote(*args, **kwargs)
|
|
|
|
def _err(f):
|
2008-01-17 08:11:35 +00:00
|
|
|
self._last_failure = f
|
2008-01-16 10:03:35 +00:00
|
|
|
if rr in self._readers:
|
|
|
|
self._readers.remove(rr)
|
|
|
|
self._upload_helper.log("call to assisted uploader %s failed" % rr,
|
|
|
|
failure=f, level=log.UNUSUAL)
|
|
|
|
# we can try again with someone else who's left
|
|
|
|
return self.call(*args, **kwargs)
|
|
|
|
d.addErrback(_err)
|
|
|
|
return d
|
|
|
|
|
|
|
|
class CHKCiphertextFetcher(AskUntilSuccessMixin):
|
|
|
|
"""I use one or more remote RIEncryptedUploadable instances to gather
|
|
|
|
ciphertext on disk. When I'm done, the file I create can be used by a
|
|
|
|
LocalCiphertextReader to satisfy the ciphertext needs of a CHK upload
|
|
|
|
process.
|
|
|
|
|
|
|
|
I begin pulling ciphertext as soon as a reader is added. I remove readers
|
|
|
|
when they have any sort of error. If the last reader is removed, I fire
|
|
|
|
my when_done() Deferred with a failure.
|
|
|
|
|
|
|
|
I fire my when_done() Deferred (with None) immediately after I have moved
|
|
|
|
the ciphertext to 'encoded_file'.
|
|
|
|
"""
|
|
|
|
|
2008-01-17 08:11:35 +00:00
|
|
|
def __init__(self, helper, incoming_file, encoded_file, logparent):
|
2008-01-16 10:03:35 +00:00
|
|
|
self._upload_helper = helper
|
|
|
|
self._incoming_file = incoming_file
|
|
|
|
self._encoding_file = encoded_file
|
2008-04-15 02:26:53 +00:00
|
|
|
self._upload_id = helper._upload_id
|
2008-01-17 08:11:35 +00:00
|
|
|
self._log_parent = logparent
|
2008-01-16 10:03:35 +00:00
|
|
|
self._done_observers = observer.OneShotObserverList()
|
2008-01-15 05:20:03 +00:00
|
|
|
self._readers = []
|
2008-01-16 10:03:35 +00:00
|
|
|
self._started = False
|
|
|
|
self._f = None
|
2008-02-06 08:52:25 +00:00
|
|
|
self._times = {
|
|
|
|
"cumulative_fetch": 0.0,
|
|
|
|
"total": 0.0,
|
|
|
|
}
|
2008-02-06 09:12:35 +00:00
|
|
|
self._ciphertext_fetched = 0
|
2008-01-15 05:20:03 +00:00
|
|
|
|
2008-01-17 08:11:35 +00:00
|
|
|
def log(self, *args, **kwargs):
|
|
|
|
if "facility" not in kwargs:
|
|
|
|
kwargs["facility"] = "tahoe.helper.chkupload.fetch"
|
|
|
|
if "parent" not in kwargs:
|
|
|
|
kwargs["parent"] = self._log_parent
|
|
|
|
return log.msg(*args, **kwargs)
|
|
|
|
|
2008-01-15 05:20:03 +00:00
|
|
|
def add_reader(self, reader):
|
2008-01-16 10:03:35 +00:00
|
|
|
AskUntilSuccessMixin.add_reader(self, reader)
|
2008-01-28 19:58:13 +00:00
|
|
|
eventually(self._start)
|
2008-01-16 10:03:35 +00:00
|
|
|
|
|
|
|
def _start(self):
|
|
|
|
if self._started:
|
|
|
|
return
|
|
|
|
self._started = True
|
2008-02-06 08:52:25 +00:00
|
|
|
started = time.time()
|
2008-01-16 10:03:35 +00:00
|
|
|
|
2008-01-28 19:58:13 +00:00
|
|
|
if os.path.exists(self._encoding_file):
|
|
|
|
self.log("ciphertext already present, bypassing fetch",
|
|
|
|
level=log.UNUSUAL)
|
2008-01-29 02:05:43 +00:00
|
|
|
# we'll still need the plaintext hashes (when
|
|
|
|
# LocalCiphertextReader.get_plaintext_hashtree_leaves() is
|
|
|
|
# called), and currently the easiest way to get them is to ask
|
|
|
|
# the sender for the last byte of ciphertext. That will provoke
|
|
|
|
# them into reading and hashing (but not sending) everything
|
|
|
|
# else.
|
|
|
|
have = os.stat(self._encoding_file)[stat.ST_SIZE]
|
|
|
|
d = self.call("read_encrypted", have-1, 1)
|
2008-02-06 08:52:25 +00:00
|
|
|
d.addCallback(self._done2, started)
|
2008-01-29 02:05:43 +00:00
|
|
|
return
|
2008-01-28 19:58:13 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
# first, find out how large the file is going to be
|
|
|
|
d = self.call("get_size")
|
|
|
|
d.addCallback(self._got_size)
|
|
|
|
d.addCallback(self._start_reading)
|
|
|
|
d.addCallback(self._done)
|
2008-02-06 08:52:25 +00:00
|
|
|
d.addCallback(self._done2, started)
|
2008-01-16 10:03:35 +00:00
|
|
|
d.addErrback(self._failed)
|
|
|
|
|
|
|
|
def _got_size(self, size):
|
2008-01-17 08:11:35 +00:00
|
|
|
self.log("total size is %d bytes" % size, level=log.NOISY)
|
2008-04-15 01:36:27 +00:00
|
|
|
self._upload_helper._upload_status.set_size(size)
|
2008-01-16 10:03:35 +00:00
|
|
|
self._expected_size = size
|
|
|
|
|
|
|
|
def _start_reading(self, res):
|
|
|
|
# then find out how much crypttext we have on disk
|
|
|
|
if os.path.exists(self._incoming_file):
|
|
|
|
self._have = os.stat(self._incoming_file)[stat.ST_SIZE]
|
2008-04-28 20:27:55 +00:00
|
|
|
self._upload_helper._helper.count("chk_upload_helper.resumes")
|
2008-01-17 08:11:35 +00:00
|
|
|
self.log("we already have %d bytes" % self._have, level=log.NOISY)
|
2008-01-16 10:03:35 +00:00
|
|
|
else:
|
|
|
|
self._have = 0
|
2008-01-17 08:15:32 +00:00
|
|
|
self.log("we do not have any ciphertext yet", level=log.NOISY)
|
|
|
|
self.log("starting ciphertext fetch", level=log.NOISY)
|
|
|
|
self._f = open(self._incoming_file, "ab")
|
2008-01-16 10:03:35 +00:00
|
|
|
|
|
|
|
# now loop to pull the data from the readers
|
|
|
|
d = defer.Deferred()
|
|
|
|
self._loop(d)
|
|
|
|
# this Deferred will be fired once the last byte has been written to
|
|
|
|
# self._f
|
|
|
|
return d
|
|
|
|
|
|
|
|
# read data in 50kB chunks. We should choose a more considered number
|
|
|
|
# here, possibly letting the client specify it. The goal should be to
|
|
|
|
# keep the RTT*bandwidth to be less than 10% of the chunk size, to reduce
|
|
|
|
# the upload bandwidth lost because this protocol is non-windowing. Too
|
|
|
|
# large, however, means more memory consumption for both ends. Something
|
|
|
|
# that can be transferred in, say, 10 seconds sounds about right. On my
|
|
|
|
# home DSL line (50kBps upstream), that suggests 500kB. Most lines are
|
|
|
|
# slower, maybe 10kBps, which suggests 100kB, and that's a bit more
|
|
|
|
# memory than I want to hang on to, so I'm going to go with 50kB and see
|
|
|
|
# how that works.
|
|
|
|
CHUNK_SIZE = 50*1024
|
|
|
|
|
|
|
|
def _loop(self, fire_when_done):
|
|
|
|
# this slightly weird structure is needed because Deferreds don't do
|
|
|
|
# tail-recursion, so it is important to let each one retire promptly.
|
|
|
|
# Simply chaining them will cause a stack overflow at the end of a
|
|
|
|
# transfer that involves more than a few hundred chunks.
|
|
|
|
# 'fire_when_done' lives a long time, but the Deferreds returned by
|
|
|
|
# the inner _fetch() call do not.
|
2008-02-06 08:52:25 +00:00
|
|
|
start = time.time()
|
2008-01-16 10:03:35 +00:00
|
|
|
d = defer.maybeDeferred(self._fetch)
|
|
|
|
def _done(finished):
|
2008-02-06 08:52:25 +00:00
|
|
|
elapsed = time.time() - start
|
|
|
|
self._times["cumulative_fetch"] += elapsed
|
2008-01-16 10:03:35 +00:00
|
|
|
if finished:
|
2008-01-17 08:11:35 +00:00
|
|
|
self.log("finished reading ciphertext", level=log.NOISY)
|
2008-01-16 10:03:35 +00:00
|
|
|
fire_when_done.callback(None)
|
|
|
|
else:
|
|
|
|
self._loop(fire_when_done)
|
|
|
|
def _err(f):
|
2008-04-15 02:41:23 +00:00
|
|
|
self.log(format="[%(si)s] ciphertext read failed",
|
|
|
|
si=self._upload_id, failure=f, level=log.UNUSUAL)
|
2008-01-16 10:03:35 +00:00
|
|
|
fire_when_done.errback(f)
|
|
|
|
d.addCallbacks(_done, _err)
|
|
|
|
return None
|
|
|
|
|
|
|
|
def _fetch(self):
|
|
|
|
needed = self._expected_size - self._have
|
|
|
|
fetch_size = min(needed, self.CHUNK_SIZE)
|
|
|
|
if fetch_size == 0:
|
2008-04-15 01:36:27 +00:00
|
|
|
self._upload_helper._upload_status.set_progress(1, 1.0)
|
2008-01-16 10:03:35 +00:00
|
|
|
return True # all done
|
2008-04-15 01:36:27 +00:00
|
|
|
percent = 0.0
|
2008-04-14 21:16:38 +00:00
|
|
|
if self._expected_size:
|
|
|
|
percent = 1.0 * (self._have+fetch_size) / self._expected_size
|
2008-04-15 02:26:53 +00:00
|
|
|
self.log(format="fetching [%(si)s] %(start)d-%(end)d of %(total)d (%(percent)d%%)",
|
|
|
|
si=self._upload_id,
|
2008-01-25 00:25:33 +00:00
|
|
|
start=self._have,
|
|
|
|
end=self._have+fetch_size,
|
|
|
|
total=self._expected_size,
|
2008-04-15 01:36:27 +00:00
|
|
|
percent=int(100.0*percent),
|
2008-01-17 08:11:35 +00:00
|
|
|
level=log.NOISY)
|
2008-01-16 10:03:35 +00:00
|
|
|
d = self.call("read_encrypted", self._have, fetch_size)
|
|
|
|
def _got_data(ciphertext_v):
|
|
|
|
for data in ciphertext_v:
|
|
|
|
self._f.write(data)
|
|
|
|
self._have += len(data)
|
2008-02-06 09:12:35 +00:00
|
|
|
self._ciphertext_fetched += len(data)
|
2008-04-14 20:18:53 +00:00
|
|
|
self._upload_helper._helper.count("chk_upload_helper.fetched_bytes", len(data))
|
2008-04-15 01:36:27 +00:00
|
|
|
self._upload_helper._upload_status.set_progress(1, percent)
|
2008-01-16 10:03:35 +00:00
|
|
|
return False # not done
|
|
|
|
d.addCallback(_got_data)
|
|
|
|
return d
|
2008-01-15 05:20:03 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def _done(self, res):
|
|
|
|
self._f.close()
|
|
|
|
self._f = None
|
2008-01-17 08:11:35 +00:00
|
|
|
self.log(format="done fetching ciphertext, size=%(size)d",
|
|
|
|
size=os.stat(self._incoming_file)[stat.ST_SIZE],
|
|
|
|
level=log.NOISY)
|
2008-01-16 10:03:35 +00:00
|
|
|
os.rename(self._incoming_file, self._encoding_file)
|
2008-01-28 19:58:13 +00:00
|
|
|
|
2008-02-06 08:52:25 +00:00
|
|
|
def _done2(self, _ignored, started):
|
2008-01-29 02:05:43 +00:00
|
|
|
self.log("done2", level=log.NOISY)
|
2008-02-06 08:52:25 +00:00
|
|
|
elapsed = time.time() - started
|
|
|
|
self._times["total"] = elapsed
|
2008-01-28 19:58:13 +00:00
|
|
|
self._readers = []
|
2008-01-16 10:03:35 +00:00
|
|
|
self._done_observers.fire(None)
|
|
|
|
|
|
|
|
def _failed(self, f):
|
|
|
|
if self._f:
|
|
|
|
self._f.close()
|
|
|
|
self._readers = []
|
|
|
|
self._done_observers.fire(f)
|
|
|
|
|
|
|
|
def when_done(self):
|
|
|
|
return self._done_observers.when_fired()
|
|
|
|
|
2008-02-06 08:52:25 +00:00
|
|
|
def get_times(self):
|
|
|
|
return self._times
|
2008-01-16 10:03:35 +00:00
|
|
|
|
2008-02-06 09:12:35 +00:00
|
|
|
def get_ciphertext_fetched(self):
|
|
|
|
return self._ciphertext_fetched
|
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
|
|
|
|
class LocalCiphertextReader(AskUntilSuccessMixin):
|
|
|
|
implements(interfaces.IEncryptedUploadable)
|
|
|
|
|
|
|
|
def __init__(self, upload_helper, storage_index, encoding_file):
|
|
|
|
self._readers = []
|
|
|
|
self._upload_helper = upload_helper
|
|
|
|
self._storage_index = storage_index
|
|
|
|
self._encoding_file = encoding_file
|
2008-02-12 22:36:05 +00:00
|
|
|
self._status = None
|
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def start(self):
|
2008-04-15 01:36:27 +00:00
|
|
|
self._upload_helper._upload_status.set_status("pushing")
|
2008-01-16 10:03:35 +00:00
|
|
|
self._size = os.stat(self._encoding_file)[stat.ST_SIZE]
|
|
|
|
self.f = open(self._encoding_file, "rb")
|
|
|
|
|
|
|
|
def get_size(self):
|
|
|
|
return defer.succeed(self._size)
|
|
|
|
|
|
|
|
def get_all_encoding_parameters(self):
|
|
|
|
return self.call("get_all_encoding_parameters")
|
|
|
|
|
|
|
|
def get_storage_index(self):
|
|
|
|
return defer.succeed(self._storage_index)
|
|
|
|
|
2008-01-25 00:25:33 +00:00
|
|
|
def read_encrypted(self, length, hash_only):
|
|
|
|
assert hash_only is False
|
2008-01-16 10:03:35 +00:00
|
|
|
d = defer.maybeDeferred(self.f.read, length)
|
|
|
|
d.addCallback(lambda data: [data])
|
|
|
|
return d
|
2009-06-01 22:49:16 +00:00
|
|
|
|
2008-01-16 10:03:35 +00:00
|
|
|
def close(self):
|
2008-01-17 08:52:33 +00:00
|
|
|
self.f.close()
|
|
|
|
# ??. I'm not sure if it makes sense to forward the close message.
|
2008-01-16 10:03:35 +00:00
|
|
|
return self.call("close")
|
|
|
|
|
|
|
|
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2009-08-15 20:17:37 +00:00
|
|
|
class Helper(Referenceable):
|
2008-03-27 22:55:32 +00:00
|
|
|
implements(interfaces.RIHelper, interfaces.IStatsProducer)
|
2008-01-10 03:25:05 +00:00
|
|
|
# this is the non-distributed version. When we need to have multiple
|
2008-01-11 11:53:37 +00:00
|
|
|
# helpers, this object will become the HelperCoordinator, and will query
|
|
|
|
# the farm of Helpers to see if anyone has the storage_index of interest,
|
|
|
|
# and send the request off to them. If nobody has it, we'll choose a
|
|
|
|
# helper at random.
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2008-01-15 04:24:26 +00:00
|
|
|
name = "helper"
|
2008-11-22 00:43:52 +00:00
|
|
|
VERSION = { "http://allmydata.org/tahoe/protocols/helper/v1" :
|
|
|
|
{ },
|
versioning: include an "appname" in the application version string in the versioning protocol, and make that appname be controlled by setup.py
It is currently hardcoded in setup.py to be 'allmydata-tahoe'. Ticket #556 is to make it configurable by a runtime command-line argument to setup.py: "--appname=foo", but I suddenly wondered if we really wanted that and at the same time realized that we don't need that for tahoe-1.3.0 release, so this patch just hardcodes it in setup.py.
setup.py inspects a file named 'src/allmydata/_appname.py' and assert that it contains the string "__appname__ = 'allmydata-tahoe'", and creates it if it isn't already present. src/allmydata/__init__.py import _appname and reads __appname__ from it. The rest of the Python code imports allmydata and inspects "allmydata.__appname__", although actually every use it uses "allmydata.__full_version__" instead, where "allmydata.__full_version__" is created in src/allmydata/__init__.py to be:
__full_version__ = __appname + '-' + str(__version__).
All the code that emits an "application version string" when describing what version of a protocol it supports (introducer server, storage server, upload helper), or when describing itself in general (introducer client), usese allmydata.__full_version__.
This fixes ticket #556 at least well enough for tahoe-1.3.0 release.
2009-02-12 00:18:16 +00:00
|
|
|
"application-version": str(allmydata.__full_version__),
|
2008-11-22 00:43:52 +00:00
|
|
|
}
|
2008-01-10 03:25:05 +00:00
|
|
|
chk_upload_helper_class = CHKUploadHelper
|
2008-04-15 01:36:27 +00:00
|
|
|
MAX_UPLOAD_STATUSES = 10
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2009-08-15 20:17:37 +00:00
|
|
|
def __init__(self, basedir, storage_broker, secret_holder,
|
|
|
|
stats_provider, history):
|
2008-01-10 03:25:05 +00:00
|
|
|
self._basedir = basedir
|
2009-08-15 20:17:37 +00:00
|
|
|
self._storage_broker = storage_broker
|
|
|
|
self._secret_holder = secret_holder
|
2008-01-16 10:03:35 +00:00
|
|
|
self._chk_incoming = os.path.join(basedir, "CHK_incoming")
|
|
|
|
self._chk_encoding = os.path.join(basedir, "CHK_encoding")
|
|
|
|
fileutil.make_dirs(self._chk_incoming)
|
|
|
|
fileutil.make_dirs(self._chk_encoding)
|
2008-01-10 03:25:05 +00:00
|
|
|
self._active_uploads = {}
|
2008-04-17 20:02:22 +00:00
|
|
|
self._all_uploads = weakref.WeakKeyDictionary() # for debugging
|
2008-04-11 00:41:42 +00:00
|
|
|
self.stats_provider = stats_provider
|
2008-03-28 00:25:16 +00:00
|
|
|
if stats_provider:
|
|
|
|
stats_provider.register_producer(self)
|
2008-04-14 20:18:53 +00:00
|
|
|
self._counters = {"chk_upload_helper.upload_requests": 0,
|
|
|
|
"chk_upload_helper.upload_already_present": 0,
|
|
|
|
"chk_upload_helper.upload_need_upload": 0,
|
2008-04-28 20:27:55 +00:00
|
|
|
"chk_upload_helper.resumes": 0,
|
2008-04-14 20:18:53 +00:00
|
|
|
"chk_upload_helper.fetched_bytes": 0,
|
|
|
|
"chk_upload_helper.encoded_bytes": 0,
|
|
|
|
}
|
Overhaul IFilesystemNode handling, to simplify tests and use POLA internally.
* stop using IURI as an adapter
* pass cap strings around instead of URI instances
* move filenode/dirnode creation duties from Client to new NodeMaker class
* move other Client duties to KeyGenerator, SecretHolder, History classes
* stop passing Client reference to dirnode/filenode constructors
- pass less-powerful references instead, like StorageBroker or Uploader
* always create DirectoryNodes by wrapping a filenode (mutable for now)
* remove some specialized mock classes from unit tests
Detailed list of changes (done one at a time, then merged together)
always pass a string to create_node_from_uri(), not an IURI instance
always pass a string to IFilesystemNode constructors, not an IURI instance
stop using IURI() as an adapter, switch on cap prefix in create_node_from_uri()
client.py: move SecretHolder code out to a separate class
test_web.py: hush pyflakes
client.py: move NodeMaker functionality out into a separate object
LiteralFileNode: stop storing a Client reference
immutable Checker: remove Client reference, it only needs a SecretHolder
immutable Upload: remove Client reference, leave SecretHolder and StorageBroker
immutable Repairer: replace Client reference with StorageBroker and SecretHolder
immutable FileNode: remove Client reference
mutable.Publish: stop passing Client
mutable.ServermapUpdater: get StorageBroker in constructor, not by peeking into Client reference
MutableChecker: reference StorageBroker and History directly, not through Client
mutable.FileNode: removed unused indirection to checker classes
mutable.FileNode: remove Client reference
client.py: move RSA key generation into a separate class, so it can be passed to the nodemaker
move create_mutable_file() into NodeMaker
test_dirnode.py: stop using FakeClient mockups, use NoNetworkGrid instead. This simplifies the code, but takes longer to run (17s instead of 6s). This should come down later when other cleanups make it possible to use simpler (non-RSA) fake mutable files for dirnode tests.
test_mutable.py: clean up basedir names
client.py: move create_empty_dirnode() into NodeMaker
dirnode.py: get rid of DirectoryNode.create
remove DirectoryNode.init_from_uri, refactor NodeMaker for customization, simplify test_web's mock Client to match
stop passing Client to DirectoryNode, make DirectoryNode.create_with_mutablefile the normal DirectoryNode constructor, start removing client from NodeMaker
remove Client from NodeMaker
move helper status into History, pass History to web.Status instead of Client
test_mutable.py: fix minor typo
2009-08-15 11:02:56 +00:00
|
|
|
self._history = history
|
2008-03-27 22:55:32 +00:00
|
|
|
|
2008-01-15 04:24:26 +00:00
|
|
|
def log(self, *args, **kwargs):
|
2008-01-10 03:25:05 +00:00
|
|
|
if 'facility' not in kwargs:
|
2008-01-15 04:24:26 +00:00
|
|
|
kwargs['facility'] = "tahoe.helper"
|
2009-08-15 20:17:37 +00:00
|
|
|
return log.msg(*args, **kwargs)
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2008-04-14 20:18:53 +00:00
|
|
|
def count(self, key, value=1):
|
|
|
|
if self.stats_provider:
|
|
|
|
self.stats_provider.count(key, value)
|
|
|
|
self._counters[key] += value
|
|
|
|
|
2008-03-26 01:19:08 +00:00
|
|
|
def get_stats(self):
|
|
|
|
OLD = 86400*2 # 48hours
|
|
|
|
now = time.time()
|
|
|
|
inc_count = inc_size = inc_size_old = 0
|
|
|
|
enc_count = enc_size = enc_size_old = 0
|
|
|
|
inc = os.listdir(self._chk_incoming)
|
|
|
|
enc = os.listdir(self._chk_encoding)
|
|
|
|
for f in inc:
|
|
|
|
s = os.stat(os.path.join(self._chk_incoming, f))
|
|
|
|
size = s[stat.ST_SIZE]
|
|
|
|
mtime = s[stat.ST_MTIME]
|
|
|
|
inc_count += 1
|
|
|
|
inc_size += size
|
|
|
|
if now - mtime > OLD:
|
|
|
|
inc_size_old += size
|
|
|
|
for f in enc:
|
|
|
|
s = os.stat(os.path.join(self._chk_encoding, f))
|
|
|
|
size = s[stat.ST_SIZE]
|
|
|
|
mtime = s[stat.ST_MTIME]
|
|
|
|
enc_count += 1
|
|
|
|
enc_size += size
|
|
|
|
if now - mtime > OLD:
|
|
|
|
enc_size_old += size
|
2008-04-14 20:18:53 +00:00
|
|
|
stats = { 'chk_upload_helper.active_uploads': len(self._active_uploads),
|
|
|
|
'chk_upload_helper.incoming_count': inc_count,
|
|
|
|
'chk_upload_helper.incoming_size': inc_size,
|
|
|
|
'chk_upload_helper.incoming_size_old': inc_size_old,
|
|
|
|
'chk_upload_helper.encoding_count': enc_count,
|
|
|
|
'chk_upload_helper.encoding_size': enc_size,
|
|
|
|
'chk_upload_helper.encoding_size_old': enc_size_old,
|
|
|
|
}
|
|
|
|
stats.update(self._counters)
|
|
|
|
return stats
|
2008-03-26 01:19:08 +00:00
|
|
|
|
2008-11-22 00:43:52 +00:00
|
|
|
def remote_get_version(self):
|
|
|
|
return self.VERSION
|
|
|
|
|
2008-01-11 11:53:37 +00:00
|
|
|
def remote_upload_chk(self, storage_index):
|
2008-04-14 20:18:53 +00:00
|
|
|
self.count("chk_upload_helper.upload_requests")
|
2008-02-06 08:52:25 +00:00
|
|
|
r = upload.UploadResults()
|
|
|
|
started = time.time()
|
2009-02-18 21:46:55 +00:00
|
|
|
si_s = si_b2a(storage_index)
|
2008-01-16 10:03:35 +00:00
|
|
|
lp = self.log(format="helper: upload_chk query for SI %(si)s", si=si_s)
|
|
|
|
incoming_file = os.path.join(self._chk_incoming, si_s)
|
|
|
|
encoding_file = os.path.join(self._chk_encoding, si_s)
|
2008-01-10 03:25:05 +00:00
|
|
|
if storage_index in self._active_uploads:
|
2008-01-15 04:24:26 +00:00
|
|
|
self.log("upload is currently active", parent=lp)
|
2008-01-10 03:25:05 +00:00
|
|
|
uh = self._active_uploads[storage_index]
|
2008-01-31 01:49:02 +00:00
|
|
|
return uh.start()
|
|
|
|
|
2008-02-06 08:52:25 +00:00
|
|
|
d = self._check_for_chk_already_in_grid(storage_index, r, lp)
|
|
|
|
def _checked(already_present):
|
|
|
|
elapsed = time.time() - started
|
|
|
|
r.timings['existence_check'] = elapsed
|
|
|
|
if already_present:
|
|
|
|
# the necessary results are placed in the UploadResults
|
2008-04-14 20:18:53 +00:00
|
|
|
self.count("chk_upload_helper.upload_already_present")
|
2008-01-31 20:45:01 +00:00
|
|
|
self.log("file already found in grid", parent=lp)
|
2008-02-06 08:52:25 +00:00
|
|
|
return (r, None)
|
2008-01-31 01:49:02 +00:00
|
|
|
|
2008-04-14 20:18:53 +00:00
|
|
|
self.count("chk_upload_helper.upload_need_upload")
|
2008-01-31 01:49:02 +00:00
|
|
|
# the file is not present in the grid, by which we mean there are
|
|
|
|
# less than 'N' shares available.
|
2008-01-31 20:45:01 +00:00
|
|
|
self.log("unable to find file in the grid", parent=lp,
|
|
|
|
level=log.NOISY)
|
2008-01-31 01:49:02 +00:00
|
|
|
# We need an upload helper. Check our active uploads again in
|
|
|
|
# case there was a race.
|
|
|
|
if storage_index in self._active_uploads:
|
|
|
|
self.log("upload is currently active", parent=lp)
|
|
|
|
uh = self._active_uploads[storage_index]
|
|
|
|
else:
|
|
|
|
self.log("creating new upload helper", parent=lp)
|
|
|
|
uh = self.chk_upload_helper_class(storage_index, self,
|
2009-08-15 20:17:37 +00:00
|
|
|
self._storage_broker,
|
|
|
|
self._secret_holder,
|
2008-01-31 01:49:02 +00:00
|
|
|
incoming_file, encoding_file,
|
2008-02-06 08:52:25 +00:00
|
|
|
r, lp)
|
2008-01-31 01:49:02 +00:00
|
|
|
self._active_uploads[storage_index] = uh
|
2008-04-17 20:02:22 +00:00
|
|
|
self._add_upload(uh)
|
2008-01-31 01:49:02 +00:00
|
|
|
return uh.start()
|
|
|
|
d.addCallback(_checked)
|
2008-01-31 20:45:01 +00:00
|
|
|
def _err(f):
|
|
|
|
self.log("error while checking for chk-already-in-grid",
|
2008-08-26 01:57:59 +00:00
|
|
|
failure=f, level=log.WEIRD, parent=lp, umid="jDtxZg")
|
2008-01-31 20:45:01 +00:00
|
|
|
return f
|
|
|
|
d.addErrback(_err)
|
2008-01-31 01:49:02 +00:00
|
|
|
return d
|
|
|
|
|
2008-02-06 08:52:25 +00:00
|
|
|
def _check_for_chk_already_in_grid(self, storage_index, results, lp):
|
2008-01-31 01:49:02 +00:00
|
|
|
# see if this file is already in the grid
|
|
|
|
lp2 = self.log("doing a quick check+UEBfetch",
|
|
|
|
parent=lp, level=log.NOISY)
|
2009-08-15 20:17:37 +00:00
|
|
|
sb = self._storage_broker
|
2011-02-21 01:58:04 +00:00
|
|
|
c = CHKCheckerAndUEBFetcher(sb.get_servers_for_psi, storage_index, lp2)
|
2008-01-31 01:49:02 +00:00
|
|
|
d = c.check()
|
|
|
|
def _checked(res):
|
|
|
|
if res:
|
|
|
|
(sharemap, ueb_data, ueb_hash) = res
|
|
|
|
self.log("found file in grid", level=log.NOISY, parent=lp)
|
2008-02-06 08:52:25 +00:00
|
|
|
results.uri_extension_hash = ueb_hash
|
2009-02-09 21:45:43 +00:00
|
|
|
results.sharemap = sharemap
|
2008-02-07 00:30:58 +00:00
|
|
|
results.uri_extension_data = ueb_data
|
2008-03-06 02:51:51 +00:00
|
|
|
results.preexisting_shares = len(sharemap)
|
|
|
|
results.pushed_shares = 0
|
2008-02-06 08:52:25 +00:00
|
|
|
return True
|
2008-01-31 01:49:02 +00:00
|
|
|
return False
|
|
|
|
d.addCallback(_checked)
|
|
|
|
return d
|
2008-01-10 03:25:05 +00:00
|
|
|
|
2008-04-17 20:02:22 +00:00
|
|
|
def _add_upload(self, uh):
|
|
|
|
self._all_uploads[uh] = None
|
Overhaul IFilesystemNode handling, to simplify tests and use POLA internally.
* stop using IURI as an adapter
* pass cap strings around instead of URI instances
* move filenode/dirnode creation duties from Client to new NodeMaker class
* move other Client duties to KeyGenerator, SecretHolder, History classes
* stop passing Client reference to dirnode/filenode constructors
- pass less-powerful references instead, like StorageBroker or Uploader
* always create DirectoryNodes by wrapping a filenode (mutable for now)
* remove some specialized mock classes from unit tests
Detailed list of changes (done one at a time, then merged together)
always pass a string to create_node_from_uri(), not an IURI instance
always pass a string to IFilesystemNode constructors, not an IURI instance
stop using IURI() as an adapter, switch on cap prefix in create_node_from_uri()
client.py: move SecretHolder code out to a separate class
test_web.py: hush pyflakes
client.py: move NodeMaker functionality out into a separate object
LiteralFileNode: stop storing a Client reference
immutable Checker: remove Client reference, it only needs a SecretHolder
immutable Upload: remove Client reference, leave SecretHolder and StorageBroker
immutable Repairer: replace Client reference with StorageBroker and SecretHolder
immutable FileNode: remove Client reference
mutable.Publish: stop passing Client
mutable.ServermapUpdater: get StorageBroker in constructor, not by peeking into Client reference
MutableChecker: reference StorageBroker and History directly, not through Client
mutable.FileNode: removed unused indirection to checker classes
mutable.FileNode: remove Client reference
client.py: move RSA key generation into a separate class, so it can be passed to the nodemaker
move create_mutable_file() into NodeMaker
test_dirnode.py: stop using FakeClient mockups, use NoNetworkGrid instead. This simplifies the code, but takes longer to run (17s instead of 6s). This should come down later when other cleanups make it possible to use simpler (non-RSA) fake mutable files for dirnode tests.
test_mutable.py: clean up basedir names
client.py: move create_empty_dirnode() into NodeMaker
dirnode.py: get rid of DirectoryNode.create
remove DirectoryNode.init_from_uri, refactor NodeMaker for customization, simplify test_web's mock Client to match
stop passing Client to DirectoryNode, make DirectoryNode.create_with_mutablefile the normal DirectoryNode constructor, start removing client from NodeMaker
remove Client from NodeMaker
move helper status into History, pass History to web.Status instead of Client
test_mutable.py: fix minor typo
2009-08-15 11:02:56 +00:00
|
|
|
if self._history:
|
|
|
|
s = uh.get_upload_status()
|
|
|
|
self._history.notify_helper_upload(s)
|
2008-04-17 20:02:22 +00:00
|
|
|
|
2008-03-27 23:46:08 +00:00
|
|
|
def upload_finished(self, storage_index, size):
|
2008-04-17 20:02:22 +00:00
|
|
|
# this is called with size=0 if the upload failed
|
2008-04-14 20:18:53 +00:00
|
|
|
self.count("chk_upload_helper.encoded_bytes", size)
|
2008-04-15 01:36:27 +00:00
|
|
|
uh = self._active_uploads[storage_index]
|
2008-01-10 03:25:05 +00:00
|
|
|
del self._active_uploads[storage_index]
|
2008-04-15 01:36:27 +00:00
|
|
|
s = uh.get_upload_status()
|
|
|
|
s.set_active(False)
|