Attempt to avoid the startup double-scan and related problems

This commit is contained in:
Jean-Paul Calderone 2019-03-18 14:24:34 -04:00
parent c01c85522d
commit d1a62b1c99

View File

@ -950,6 +950,10 @@ class QueueMixin(HookMixin):
"""
action = PROCESSING_LOOP(**self._log_fields)
# Keep track of the fact that we're just starting up and we can and
# should skip a collective scan for one iteration.
self._startup_iteration = True
# Note that we don't put the processing iterations into the logging
# action because we expect this loop to run for the whole lifetime of
# the process. The tooling for dealing with incomplete action trees
@ -973,8 +977,21 @@ class QueueMixin(HookMixin):
with action.context():
d = DeferredContext(defer.Deferred())
# adds items to our deque
d.addCallback(lambda ignored: self._perform_scan())
if self._startup_iteration:
# During startup we scanned the collective for items to
# download. We do not need to perform another scan before
# processing our work queue. More importantly, the logic for
# determining which items to download is *not correct* in the
# case where two scans are performed with no intermediate
# emptying of the work queue. Therefore, skip the scan in the
# first processing iteration. Either there will be work in
# the queue from the initial scan or not. Either way, when we
# get here again on the next iteration, we'll go the other way
# and perform a scan.
self._startup_iteration = False
else:
# adds items to our deque
d.addCallback(lambda ignored: self._perform_scan())
# process anything in our queue
d.addCallback(lambda ignored: self._process_deque())
@ -1854,6 +1871,7 @@ class Downloader(QueueMixin, WriteFileMixin):
@eliotutil.log_call_deferred(SCAN_REMOTE_COLLECTIVE.action_type)
def _scan_remote_collective(self, scan_self=False):
precondition(not self._deque, "Items in _deque invalidate should_download logic")
scan_batch = {} # path -> [(filenode, metadata)]
d = DeferredContext(self._collective_dirnode.list())
def scan_collective(dirmap):