mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-04-16 23:18:58 +00:00
crawler: add get_progress, clean up get_state
This commit is contained in:
parent
2e45619844
commit
73e05bf967
@ -75,12 +75,43 @@ class ShareCrawler(service.MultiService):
|
||||
self.current_sleep_time = None
|
||||
self.next_wake_time = None
|
||||
|
||||
def get_progress(self):
|
||||
"""I return information about how much progress the crawler is
|
||||
making. My return value is a dictionary. The primary key is
|
||||
'cycle-in-progress': True if the crawler is currently traversing the
|
||||
shares, False if it is idle between cycles.
|
||||
|
||||
If cycle-in-progress is True, the following keys will be present::
|
||||
|
||||
cycle-complete-percentage': float, from 0.0 to 100.0, indicating how
|
||||
far the crawler has progressed through
|
||||
the current cycle
|
||||
remaining-sleep-time: float, seconds from now when we do more work
|
||||
|
||||
|
||||
If cycle-in-progress is False, the following keys are available::
|
||||
|
||||
next-crawl-time: float, seconds-since-epoch when next crawl starts
|
||||
|
||||
remaining-wait-time: float, seconds from now when next crawl starts
|
||||
"""
|
||||
|
||||
d = {}
|
||||
if self.state["current-cycle"] is None:
|
||||
assert self.sleeping_between_cycles
|
||||
d["cycle-in-progress"] = False
|
||||
d["next-crawl-time"] = self.next_wake_time
|
||||
d["remaining-wait-time"] = self.next_wake_time - time.time()
|
||||
else:
|
||||
d["cycle-in-progress"] = True
|
||||
pct = 100.0 * self.last_complete_prefix_index / len(self.prefixes)
|
||||
d["cycle-complete-percentage"] = pct
|
||||
d["remaining-sleep-time"] = self.next_wake_time - time.time()
|
||||
return d
|
||||
|
||||
def get_state(self):
|
||||
"""I return the current state of the crawler. This is a copy of my
|
||||
state dictionary, plus the following keys::
|
||||
|
||||
current-sleep-time: float, duration of our current sleep
|
||||
next-wake-time: float, seconds-since-epoch of when we will next wake
|
||||
state dictionary.
|
||||
|
||||
If we are not currently sleeping (i.e. get_state() was called from
|
||||
inside the process_prefixdir, process_bucket, or finished_cycle()
|
||||
@ -88,8 +119,6 @@ class ShareCrawler(service.MultiService):
|
||||
these two keys will be None.
|
||||
"""
|
||||
state = self.state.copy() # it isn't a deepcopy, so don't go crazy
|
||||
state["current-sleep-time"] = self.current_sleep_time
|
||||
state["next-wake-time"] = self.next_wake_time
|
||||
return state
|
||||
|
||||
def load_state(self):
|
||||
|
@ -31,6 +31,7 @@ class PacedCrawler(ShareCrawler):
|
||||
self.countdown = 6
|
||||
self.all_buckets = []
|
||||
self.finished_d = defer.Deferred()
|
||||
self.yield_cb = None
|
||||
def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32):
|
||||
self.all_buckets.append(storage_index_b32)
|
||||
self.countdown -= 1
|
||||
@ -39,6 +40,8 @@ class PacedCrawler(ShareCrawler):
|
||||
self.cpu_slice = -1.0
|
||||
def yielding(self, sleep_time):
|
||||
self.cpu_slice = 500
|
||||
if self.yield_cb:
|
||||
self.yield_cb()
|
||||
def finished_cycle(self, cycle):
|
||||
eventual.eventually(self.finished_d.callback, None)
|
||||
|
||||
@ -173,6 +176,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
|
||||
# that should stop in the middle of one of the buckets.
|
||||
c.cpu_slice = PacedCrawler.cpu_slice
|
||||
self.failUnlessEqual(len(c.all_buckets), 6)
|
||||
|
||||
c.start_current_prefix(time.time()) # finish it
|
||||
self.failUnlessEqual(len(sis), len(c.all_buckets))
|
||||
self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
|
||||
@ -252,18 +256,53 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin):
|
||||
|
||||
statefile = os.path.join(self.basedir, "statefile")
|
||||
c = PacedCrawler(ss, statefile)
|
||||
|
||||
did_check_progress = [False]
|
||||
def check_progress():
|
||||
c.yield_cb = None
|
||||
try:
|
||||
p = c.get_progress()
|
||||
self.failUnlessEqual(p["cycle-in-progress"], True)
|
||||
pct = p["cycle-complete-percentage"]
|
||||
# after 6 buckets, we happen to be at 76.17% complete. As
|
||||
# long as we create shares in deterministic order, this will
|
||||
# continue to be true.
|
||||
self.failUnlessEqual(int(pct), 76)
|
||||
left = p["remaining-sleep-time"]
|
||||
self.failUnless(isinstance(left, float), left)
|
||||
self.failUnless(left > 0.0, left)
|
||||
except Exception, e:
|
||||
did_check_progress[0] = e
|
||||
else:
|
||||
did_check_progress[0] = True
|
||||
c.yield_cb = check_progress
|
||||
|
||||
c.setServiceParent(self.s)
|
||||
# that should get through 6 buckets, pause for a little while, then
|
||||
# resume
|
||||
# that should get through 6 buckets, pause for a little while (and
|
||||
# run check_progress()), then resume
|
||||
|
||||
d = c.finished_d
|
||||
def _check(ignored):
|
||||
if did_check_progress[0] is not True:
|
||||
raise did_check_progress[0]
|
||||
self.failUnless(did_check_progress[0])
|
||||
self.failUnlessEqual(sorted(sis), sorted(c.all_buckets))
|
||||
# at this point, the crawler should be sitting in the inter-cycle
|
||||
# timer, which should be pegged at the minumum cycle time
|
||||
self.failUnless(c.timer)
|
||||
self.failUnless(c.sleeping_between_cycles)
|
||||
self.failUnlessEqual(c.current_sleep_time, c.minimum_cycle_time)
|
||||
|
||||
p = c.get_progress()
|
||||
self.failUnlessEqual(p["cycle-in-progress"], False)
|
||||
naptime = p["remaining-wait-time"]
|
||||
self.failUnless(isinstance(naptime, float), naptime)
|
||||
# min-cycle-time is 300, so this is basically testing that it took
|
||||
# less than 290s to crawl
|
||||
self.failUnless(naptime > 10.0, naptime)
|
||||
soon = p["next-crawl-time"] - time.time()
|
||||
self.failUnless(soon > 10.0, soon)
|
||||
|
||||
d.addCallback(_check)
|
||||
return d
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user