BucketCountingCrawler: rename status and state keys to use 'bucket' instead of 'share', because the former is more accurate

2025-04-26 13:59:59 +00:00 · 2009-02-20 21:46:06 -07:00 · 2009-02-20 21:46:06 -07:00 · 1077826357
commit 1077826357
parent d2d297f12f
5 changed files with 26 additions and 19 deletions
--- a/src/allmydata/storage/crawler.py
+++ b/src/allmydata/storage/crawler.py
@ -336,7 +336,9 @@ class BucketCountingCrawler(ShareCrawler):
    which I am providing storage. The actual number of files+directories in
    the full grid is probably higher (especially when there are more servers
    than 'N', the number of generated shares), because some files+directories
-    will have shares on other servers instead of me.
+    will have shares on other servers instead of me. Also note that the
+    number of buckets will differ from the number of shares in small grids,
+    when more than one share is placed on a single server.
    """

    minimum_cycle_time = 60*60 # we don't need this more than once an hour
@ -346,13 +348,13 @@ class BucketCountingCrawler(ShareCrawler):
        self.num_sample_prefixes = num_sample_prefixes

    def add_initial_state(self):
-        # ["share-counts"][cyclenum][prefix] = number
+        # ["bucket-counts"][cyclenum][prefix] = number
        # ["last-complete-cycle"] = cyclenum # maintained by base class
-        # ["last-complete-share-count"] = number
+        # ["last-complete-bucket-count"] = number
        # ["storage-index-samples"][prefix] = (cyclenum,
        #                                      list of SI strings (base32))
-        self.state.setdefault("share-counts", {})
-        self.state.setdefault("last-complete-share-count", None)
+        self.state.setdefault("bucket-counts", {})
+        self.state.setdefault("last-complete-bucket-count", None)
        self.state.setdefault("storage-index-samples", {})

    def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice):
@ -360,22 +362,22 @@ class BucketCountingCrawler(ShareCrawler):
        # the individual buckets. We'll save state after each one. On my
        # laptop, a mostly-empty storage server can process about 70
        # prefixdirs in a 1.0s slice.
-        if cycle not in self.state["share-counts"]:
-            self.state["share-counts"][cycle] = {}
-        self.state["share-counts"][cycle][prefix] = len(buckets)
+        if cycle not in self.state["bucket-counts"]:
+            self.state["bucket-counts"][cycle] = {}
+        self.state["bucket-counts"][cycle][prefix] = len(buckets)
        if prefix in self.prefixes[:self.num_sample_prefixes]:
            self.state["storage-index-samples"][prefix] = (cycle, buckets)

    def finished_cycle(self, cycle):
-        last_counts = self.state["share-counts"].get(cycle, [])
+        last_counts = self.state["bucket-counts"].get(cycle, [])
        if len(last_counts) == len(self.prefixes):
            # great, we have a whole cycle.
            num_buckets = sum(last_counts.values())
-            self.state["last-complete-share-count"] = (cycle, num_buckets)
+            self.state["last-complete-bucket-count"] = (cycle, num_buckets)
            # get rid of old counts
-            for old_cycle in list(self.state["share-counts"].keys()):
+            for old_cycle in list(self.state["bucket-counts"].keys()):
                if old_cycle != cycle:
-                    del self.state["share-counts"][old_cycle]
+                    del self.state["bucket-counts"][old_cycle]
        # get rid of old samples too
        for prefix in list(self.state["storage-index-samples"].keys()):
            old_cycle,buckets = self.state["storage-index-samples"][prefix]
--- a/src/allmydata/storage/server.py
+++ b/src/allmydata/storage/server.py
@ -170,6 +170,11 @@ class StorageServer(service.MultiService, Referenceable):
            # os.statvfs is available only on unix
            pass
        stats["storage_server.accepting_immutable_shares"] = int(writeable)
+        s = self.bucket_counter.get_state()
+        bucket_count = s.get("last-complete-bucket-count")
+        if bucket_count:
+            cycle, count = bucket_count
+            stats["storage_server.total_bucket_count"] = count
        return stats


--- a/src/allmydata/test/test_storage.py
+++ b/src/allmydata/test/test_storage.py
@ -1376,7 +1376,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin):
            state = ss.bucket_counter.state
            self.failUnlessEqual(state["last-complete-prefix"],
                                 ss.bucket_counter.prefixes[0])
-            state["share-counts"][-12] = {}
+            state["bucket-counts"][-12] = {}
            state["storage-index-samples"]["bogusprefix!"] = (-12, [])
            ss.bucket_counter.save_state()
        d.addCallback(_after_first_prefix)
@ -1388,7 +1388,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin):
        def _check2(ignored):
            ss.bucket_counter.cpu_slice = orig_cpu_slice
            s = ss.bucket_counter.get_state()
-            self.failIf(-12 in s["share-counts"], s["share-counts"].keys())
+            self.failIf(-12 in s["bucket-counts"], s["bucket-counts"].keys())
            self.failIf("bogusprefix!" in s["storage-index-samples"],
                        s["storage-index-samples"].keys())
        d.addCallback(_check2)
--- a/src/allmydata/web/storage.py
+++ b/src/allmydata/web/storage.py
@ -63,12 +63,12 @@ class StorageStatus(rend.Page):
        d.setdefault("disk_avail", None)
        return d

-    def data_last_complete_share_count(self, ctx, data):
+    def data_last_complete_bucket_count(self, ctx, data):
        s = self.storage.bucket_counter.get_state()
-        lcsc = s.get("last-complete-share-count")
-        if lcsc is None:
+        lcbc = s.get("last-complete-bucket-count")
+        if lcbc is None:
            return "Not computed yet"
-        cycle, count = lcsc
+        cycle, count = lcbc
        return count

    def render_count_crawler_status(self, ctx, storage):
--- a/src/allmydata/web/storage_status.xhtml
+++ b/src/allmydata/web/storage_status.xhtml
@ -38,7 +38,7 @@

  <ul>
    <li>Total buckets:
-       <span n:render="string" n:data="last_complete_share_count" />
+       <span n:render="string" n:data="last_complete_bucket_count" />
       (the number of files and directories for which this server is holding
        a share)
      <ul>