misc/spacetime: use async polling so we can add a 60-second timeout, add an index to the 'url' Axiom column for 2x speedup

2025-04-07 10:56:49 +00:00 · 2008-09-30 16:34:48 -07:00 · 2008-09-30 16:34:48 -07:00 · 3aba70bbcf
commit 3aba70bbcf
parent cd26f58305
2 changed files with 9 additions and 5 deletions
--- a/misc/spacetime/diskwatcher.py
+++ b/misc/spacetime/diskwatcher.py
@ -7,7 +7,7 @@ from axiom.item import Item
 from axiom.attributes import text, integer, timestamp

 class Sample(Item):
-    url = text()
+    url = text(indexed=True)
    when = timestamp(indexed=True)
    used = integer()
    avail = integer()
--- a/misc/spacetime/diskwatcher.tac
+++ b/misc/spacetime/diskwatcher.tac
@ -105,17 +105,19 @@ class DiskWatcher(service.MultiService, resource.Resource):

    def poll(self):
        log.msg("polling..")
-        return self.poll_synchronous()
-        #return self.poll_asynchronous()
+        #return self.poll_synchronous()
+        return self.poll_asynchronous()

    def poll_asynchronous(self):
        # this didn't actually seem to work any better than poll_synchronous:
        # logs are more noisy, and I got frequent DNS failures. But with a
-        # lot of servers to query, this is probably the better way to go.
+        # lot of servers to query, this is probably the better way to go. A
+        # significant advantage of this approach is that we can use a
+        # timeout= argument to tolerate hanging servers.
        dl = []
        for url in self.get_urls():
            when = extime.Time()
-            d = client.getPage(url)
+            d = client.getPage(url, timeout=60)
            d.addCallback(self.got_response, when, url)
            dl.append(d)
        d = defer.DeferredList(dl)
@ -132,6 +134,8 @@ class DiskWatcher(service.MultiService, resource.Resource):
            attempts += 1
            try:
                when = extime.Time()
+                # if a server accepts the connection and then hangs, this
+                # will block forever
                data_json = urllib.urlopen(url).read()
                self.got_response(data_json, when, url)
                fetched += 1