From 3b69df36b0604a0981c92a4d4c0da0611bc04535 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 23 Oct 2021 15:38:51 -0600 Subject: [PATCH 01/35] crawler: pickle -> json --- src/allmydata/storage/crawler.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index bd4f4f432..d7dee78dc 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -11,15 +11,12 @@ from __future__ import print_function from future.utils import PY2, PY3 if PY2: - # We don't import bytes, object, dict, and list just in case they're used, - # so as not to create brittle pickles with random magic objects. - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min -import os, time, struct -try: - import cPickle as pickle -except ImportError: - import pickle # type: ignore +import os +import time +import json +import struct from twisted.internet import reactor from twisted.application import service from allmydata.storage.common import si_b2a @@ -214,7 +211,7 @@ class ShareCrawler(service.MultiService): # None if we are sleeping between cycles try: with open(self.statefile, "rb") as f: - state = pickle.load(f) + state = json.load(f) except Exception: state = {"version": 1, "last-cycle-finished": None, @@ -252,9 +249,7 @@ class ShareCrawler(service.MultiService): self.state["last-complete-prefix"] = last_complete_prefix tmpfile = self.statefile + ".tmp" with open(tmpfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(self.state, f, protocol=2) + json.dump(self.state, f) fileutil.move_into_place(tmpfile, self.statefile) def startService(self): From 758dcea2d4a73d472050d5f21bc84217a71802b8 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 23 Oct 2021 16:19:27 -0600 Subject: [PATCH 02/35] news --- newsfragments/3825.security | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 newsfragments/3825.security diff --git a/newsfragments/3825.security b/newsfragments/3825.security new file mode 100644 index 000000000..b16418d2b --- /dev/null +++ b/newsfragments/3825.security @@ -0,0 +1,5 @@ +The lease-checker now uses JSON instead of pickle to serialize its state. + +Once you have run this version the lease state files will be stored in JSON +and an older version of the software won't load them (it simply won't notice +them so it will appear to have never run). From f7b385f9544f48ebfc7da69b314d3d1dda30ee2c Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 24 Oct 2021 22:27:59 -0600 Subject: [PATCH 03/35] play nice with subclasses --- src/allmydata/storage/crawler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index d7dee78dc..b931f1ab5 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -248,8 +248,12 @@ class ShareCrawler(service.MultiService): last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix tmpfile = self.statefile + ".tmp" + + # Note: we use self.get_state() here because e.g + # LeaseCheckingCrawler stores non-JSON-able state in + # self.state() but converts it in self.get_state() with open(tmpfile, "wb") as f: - json.dump(self.state, f) + json.dump(self.get_state(), f) fileutil.move_into_place(tmpfile, self.statefile) def startService(self): From bb70e00065ab42f0e9f8faabff50d587532f49f7 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 24 Oct 2021 23:47:24 -0600 Subject: [PATCH 04/35] Make internal state JSON-able for lease-crawler --- src/allmydata/storage/expirer.py | 53 +++++++++++++------------- src/allmydata/test/test_storage_web.py | 32 ++++++++-------- src/allmydata/web/storage.py | 11 +++--- 3 files changed, 49 insertions(+), 47 deletions(-) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 7c6cd8218..4513dadb2 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -5,10 +5,11 @@ from __future__ import unicode_literals from future.utils import PY2 if PY2: - # We omit anything that might end up in pickle, just in case. - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 - -import time, os, pickle, struct + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import json +import time +import os +import struct from allmydata.storage.crawler import ShareCrawler from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ @@ -95,9 +96,7 @@ class LeaseCheckingCrawler(ShareCrawler): if not os.path.exists(self.historyfile): history = {} # cyclenum -> dict with open(self.historyfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(history, f, protocol=2) + json.dump(history, f) def create_empty_cycle_dict(self): recovered = self.create_empty_recovered_dict() @@ -142,7 +141,7 @@ class LeaseCheckingCrawler(ShareCrawler): struct.error): twlog.msg("lease-checker error processing %s" % sharefile) twlog.err() - which = (storage_index_b32, shnum) + which = [storage_index_b32, shnum] self.state["cycle-to-date"]["corrupt-shares"].append(which) wks = (1, 1, 1, "unknown") would_keep_shares.append(wks) @@ -212,7 +211,7 @@ class LeaseCheckingCrawler(ShareCrawler): num_valid_leases_configured += 1 so_far = self.state["cycle-to-date"] - self.increment(so_far["leases-per-share-histogram"], num_leases, 1) + self.increment(so_far["leases-per-share-histogram"], str(num_leases), 1) self.increment_space("examined", s, sharetype) would_keep_share = [1, 1, 1, sharetype] @@ -291,12 +290,14 @@ class LeaseCheckingCrawler(ShareCrawler): start = self.state["current-cycle-start-time"] now = time.time() - h["cycle-start-finish-times"] = (start, now) + h["cycle-start-finish-times"] = [start, now] h["expiration-enabled"] = self.expiration_enabled - h["configured-expiration-mode"] = (self.mode, - self.override_lease_duration, - self.cutoff_date, - self.sharetypes_to_expire) + h["configured-expiration-mode"] = [ + self.mode, + self.override_lease_duration, + self.cutoff_date, + self.sharetypes_to_expire, + ] s = self.state["cycle-to-date"] @@ -315,15 +316,13 @@ class LeaseCheckingCrawler(ShareCrawler): h["space-recovered"] = s["space-recovered"].copy() with open(self.historyfile, "rb") as f: - history = pickle.load(f) - history[cycle] = h + history = json.load(f) + history[str(cycle)] = h while len(history) > 10: - oldcycles = sorted(history.keys()) - del history[oldcycles[0]] + oldcycles = sorted(int(k) for k in history.keys()) + del history[str(oldcycles[0])] with open(self.historyfile, "wb") as f: - # Newer protocols won't work in Python 2; when it is dropped, - # protocol v4 can be used (added in Python 3.4). - pickle.dump(history, f, protocol=2) + json.dump(history, f) def get_state(self): """In addition to the crawler state described in @@ -393,7 +392,7 @@ class LeaseCheckingCrawler(ShareCrawler): state = ShareCrawler.get_state(self) # does a shallow copy with open(self.historyfile, "rb") as f: - history = pickle.load(f) + history = json.load(f) state["history"] = history if not progress["cycle-in-progress"]: @@ -406,10 +405,12 @@ class LeaseCheckingCrawler(ShareCrawler): lah = so_far["lease-age-histogram"] so_far["lease-age-histogram"] = self.convert_lease_age_histogram(lah) so_far["expiration-enabled"] = self.expiration_enabled - so_far["configured-expiration-mode"] = (self.mode, - self.override_lease_duration, - self.cutoff_date, - self.sharetypes_to_expire) + so_far["configured-expiration-mode"] = [ + self.mode, + self.override_lease_duration, + self.cutoff_date, + self.sharetypes_to_expire, + ] so_far_sr = so_far["space-recovered"] remaining_sr = {} diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 38e380223..b9fa548d3 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -376,7 +376,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) self.failUnlessEqual(lah, [ (0.0, DAY, 1) ] ) - self.failUnlessEqual(so_far["leases-per-share-histogram"], {1: 1}) + self.failUnlessEqual(so_far["leases-per-share-histogram"], {"1": 1}) self.failUnlessEqual(so_far["corrupt-shares"], []) sr1 = so_far["space-recovered"] self.failUnlessEqual(sr1["examined-buckets"], 1) @@ -427,9 +427,9 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failIf("cycle-to-date" in s) self.failIf("estimated-remaining-cycle" in s) self.failIf("estimated-current-cycle" in s) - last = s["history"][0] + last = s["history"]["0"] self.failUnlessIn("cycle-start-finish-times", last) - self.failUnlessEqual(type(last["cycle-start-finish-times"]), tuple) + self.failUnlessEqual(type(last["cycle-start-finish-times"]), list) self.failUnlessEqual(last["expiration-enabled"], False) self.failUnlessIn("configured-expiration-mode", last) @@ -437,9 +437,9 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): lah = last["lease-age-histogram"] self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) - self.failUnlessEqual(lah, [ (0.0, DAY, 6) ] ) + self.failUnlessEqual(lah, [ [0.0, DAY, 6] ] ) - self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) + self.failUnlessEqual(last["leases-per-share-histogram"], {"1": 2, "2": 2}) self.failUnlessEqual(last["corrupt-shares"], []) rec = last["space-recovered"] @@ -587,12 +587,12 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], - ("age", 2000, None, ("mutable", "immutable"))) - self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) + ["age", 2000, None, ["mutable", "immutable"]]) + self.failUnlessEqual(last["leases-per-share-histogram"], {"1": 2, "2": 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) @@ -731,14 +731,14 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.failUnlessEqual(count_leases(mutable_si_3), 1) s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] self.failUnlessEqual(last["expiration-enabled"], True) self.failUnlessEqual(last["configured-expiration-mode"], - ("cutoff-date", None, then, - ("mutable", "immutable"))) + ["cutoff-date", None, then, + ["mutable", "immutable"]]) self.failUnlessEqual(last["leases-per-share-histogram"], - {1: 2, 2: 2}) + {"1": 2, "2": 2}) rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 4) @@ -924,8 +924,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): s = lc.get_state() h = s["history"] self.failUnlessEqual(len(h), 10) - self.failUnlessEqual(max(h.keys()), 15) - self.failUnlessEqual(min(h.keys()), 6) + self.failUnlessEqual(max(int(k) for k in h.keys()), 15) + self.failUnlessEqual(min(int(k) for k in h.keys()), 6) d.addCallback(_check) return d @@ -1014,7 +1014,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): def _check(ignored): s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] rec = last["space-recovered"] self.failUnlessEqual(rec["configured-buckets"], 4) self.failUnlessEqual(rec["configured-shares"], 4) @@ -1110,7 +1110,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): def _after_first_cycle(ignored): s = lc.get_state() - last = s["history"][0] + last = s["history"]["0"] rec = last["space-recovered"] self.failUnlessEqual(rec["examined-buckets"], 5) self.failUnlessEqual(rec["examined-shares"], 3) diff --git a/src/allmydata/web/storage.py b/src/allmydata/web/storage.py index f2f021a15..e568d5ed5 100644 --- a/src/allmydata/web/storage.py +++ b/src/allmydata/web/storage.py @@ -256,8 +256,8 @@ class StorageStatusElement(Element): if so_far["corrupt-shares"]: add("Corrupt shares:", - T.ul( (T.li( ["SI %s shnum %d" % corrupt_share - for corrupt_share in so_far["corrupt-shares"] ] + T.ul( (T.li( ["SI %s shnum %d" % (si, shnum) + for si, shnum in so_far["corrupt-shares"] ] )))) return tag("Current cycle:", p) @@ -267,7 +267,8 @@ class StorageStatusElement(Element): h = lc.get_state()["history"] if not h: return "" - last = h[max(h.keys())] + biggest = str(max(int(k) for k in h.keys())) + last = h[biggest] start, end = last["cycle-start-finish-times"] tag("Last complete cycle (which took %s and finished %s ago)" @@ -290,8 +291,8 @@ class StorageStatusElement(Element): if last["corrupt-shares"]: add("Corrupt shares:", - T.ul( (T.li( ["SI %s shnum %d" % corrupt_share - for corrupt_share in last["corrupt-shares"] ] + T.ul( (T.li( ["SI %s shnum %d" % (si, shnum) + for si, shnum in last["corrupt-shares"] ] )))) return tag(p) From fa6950f08dc054ec770af1c402e8eb5d3c581b3e Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 12:18:28 -0600 Subject: [PATCH 05/35] an old pickle-format lease-checker state file --- src/allmydata/test/data/lease_checker.state | 545 ++++++++++++++++++++ 1 file changed, 545 insertions(+) create mode 100644 src/allmydata/test/data/lease_checker.state diff --git a/src/allmydata/test/data/lease_checker.state b/src/allmydata/test/data/lease_checker.state new file mode 100644 index 000000000..b32554434 --- /dev/null +++ b/src/allmydata/test/data/lease_checker.state @@ -0,0 +1,545 @@ +(dp1 +S'last-complete-prefix' +p2 +NsS'version' +p3 +I1 +sS'current-cycle-start-time' +p4 +F1635003106.611748 +sS'last-cycle-finished' +p5 +I312 +sS'cycle-to-date' +p6 +(dp7 +Vleases-per-share-histogram +p8 +(dp9 +I1 +I36793 +sI2 +I1 +ssVspace-recovered +p10 +(dp11 +Vexamined-buckets-immutable +p12 +I17183 +sVconfigured-buckets-mutable +p13 +I0 +sVexamined-shares-mutable +p14 +I1796 +sVoriginal-shares-mutable +p15 +I1563 +sVconfigured-buckets-immutable +p16 +I0 +sVoriginal-shares-immutable +p17 +I27926 +sVoriginal-diskbytes-immutable +p18 +I431149056 +sVexamined-shares-immutable +p19 +I34998 +sVoriginal-buckets +p20 +I14661 +sVactual-shares-immutable +p21 +I0 +sVconfigured-shares +p22 +I0 +sVoriginal-buckets-immutable +p23 +I13761 +sVactual-diskbytes +p24 +I4096 +sVactual-shares-mutable +p25 +I0 +sVconfigured-buckets +p26 +I1 +sVexamined-buckets-unknown +p27 +I14 +sVactual-sharebytes +p28 +I0 +sVoriginal-shares +p29 +I29489 +sVoriginal-sharebytes +p30 +I312664812 +sVexamined-sharebytes-immutable +p31 +I383801602 +sVactual-shares +p32 +I0 +sVactual-sharebytes-immutable +p33 +I0 +sVoriginal-diskbytes +p34 +I441643008 +sVconfigured-diskbytes-mutable +p35 +I0 +sVconfigured-sharebytes-immutable +p36 +I0 +sVconfigured-shares-mutable +p37 +I0 +sVactual-diskbytes-immutable +p38 +I0 +sVconfigured-diskbytes-immutable +p39 +I0 +sVoriginal-diskbytes-mutable +p40 +I10489856 +sVactual-sharebytes-mutable +p41 +I0 +sVconfigured-sharebytes +p42 +I0 +sVexamined-shares +p43 +I36794 +sVactual-diskbytes-mutable +p44 +I0 +sVactual-buckets +p45 +I1 +sVoriginal-buckets-mutable +p46 +I899 +sVconfigured-sharebytes-mutable +p47 +I0 +sVexamined-sharebytes +p48 +I390369660 +sVoriginal-sharebytes-immutable +p49 +I308125753 +sVoriginal-sharebytes-mutable +p50 +I4539059 +sVactual-buckets-mutable +p51 +I0 +sVexamined-diskbytes-mutable +p52 +I9154560 +sVexamined-buckets-mutable +p53 +I1043 +sVconfigured-shares-immutable +p54 +I0 +sVexamined-diskbytes +p55 +I476598272 +sVactual-buckets-immutable +p56 +I0 +sVexamined-sharebytes-mutable +p57 +I6568058 +sVexamined-buckets +p58 +I18241 +sVconfigured-diskbytes +p59 +I4096 +sVexamined-diskbytes-immutable +p60 +I467443712 +ssVcorrupt-shares +p61 +(lp62 +(V2dn6xnlnsqwtnapwxfdivpm3s4 +p63 +I4 +tp64 +a(g63 +I1 +tp65 +a(V2rrzthwsrrxolevmwdvbdy3rqi +p66 +I4 +tp67 +a(g66 +I1 +tp68 +a(V2skfngcto6h7eqmn4uo7ntk3ne +p69 +I4 +tp70 +a(g69 +I1 +tp71 +a(V32d5swqpqx2mwix7xmqzvhdwje +p72 +I4 +tp73 +a(g72 +I1 +tp74 +a(V5mmayp66yflmpon3o6unsnbaca +p75 +I4 +tp76 +a(g75 +I1 +tp77 +a(V6ixhpvbtre7fnrl6pehlrlflc4 +p78 +I4 +tp79 +a(g78 +I1 +tp80 +a(Vewzhvswjsz4vp2bqkb6mi3bz2u +p81 +I4 +tp82 +a(g81 +I1 +tp83 +a(Vfu7pazf6ogavkqj6z4q5qqex3u +p84 +I4 +tp85 +a(g84 +I1 +tp86 +a(Vhbyjtqvpcimwxiyqbcbbdn2i4a +p87 +I4 +tp88 +a(g87 +I1 +tp89 +a(Vpmcjbdkbjdl26k3e6yja77femq +p90 +I4 +tp91 +a(g90 +I1 +tp92 +a(Vr6swof4v2uttbiiqwj5pi32cm4 +p93 +I4 +tp94 +a(g93 +I1 +tp95 +a(Vt45v5akoktf53evc2fi6gwnv6y +p96 +I4 +tp97 +a(g96 +I1 +tp98 +a(Vy6zb4faar3rdvn3e6pfg4wlotm +p99 +I4 +tp100 +a(g99 +I1 +tp101 +a(Vz3yghutvqoqbchjao4lndnrh3a +p102 +I4 +tp103 +a(g102 +I1 +tp104 +asVlease-age-histogram +p105 +(dp106 +(I45619200 +I45705600 +tp107 +I4 +s(I12441600 +I12528000 +tp108 +I78 +s(I11923200 +I12009600 +tp109 +I89 +s(I33436800 +I33523200 +tp110 +I7 +s(I37411200 +I37497600 +tp111 +I4 +s(I38361600 +I38448000 +tp112 +I5 +s(I4665600 +I4752000 +tp113 +I256 +s(I11491200 +I11577600 +tp114 +I20 +s(I10713600 +I10800000 +tp115 +I183 +s(I42076800 +I42163200 +tp116 +I4 +s(I47865600 +I47952000 +tp117 +I7 +s(I3110400 +I3196800 +tp118 +I328 +s(I5788800 +I5875200 +tp119 +I954 +s(I9331200 +I9417600 +tp120 +I12 +s(I7430400 +I7516800 +tp121 +I7228 +s(I1555200 +I1641600 +tp122 +I492 +s(I37929600 +I38016000 +tp123 +I3 +s(I38880000 +I38966400 +tp124 +I3 +s(I12528000 +I12614400 +tp125 +I193 +s(I10454400 +I10540800 +tp126 +I1239 +s(I11750400 +I11836800 +tp127 +I7 +s(I950400 +I1036800 +tp128 +I4435 +s(I44409600 +I44496000 +tp129 +I13 +s(I12787200 +I12873600 +tp130 +I218 +s(I10368000 +I10454400 +tp131 +I117 +s(I3283200 +I3369600 +tp132 +I86 +s(I7516800 +I7603200 +tp133 +I993 +s(I42336000 +I42422400 +tp134 +I33 +s(I46310400 +I46396800 +tp135 +I1 +s(I39052800 +I39139200 +tp136 +I51 +s(I7603200 +I7689600 +tp137 +I2004 +s(I10540800 +I10627200 +tp138 +I16 +s(I36374400 +I36460800 +tp139 +I3 +s(I3369600 +I3456000 +tp140 +I79 +s(I12700800 +I12787200 +tp141 +I25 +s(I4838400 +I4924800 +tp142 +I386 +s(I10972800 +I11059200 +tp143 +I122 +s(I8812800 +I8899200 +tp144 +I57 +s(I38966400 +I39052800 +tp145 +I61 +s(I3196800 +I3283200 +tp146 +I628 +s(I9244800 +I9331200 +tp147 +I73 +s(I30499200 +I30585600 +tp148 +I5 +s(I12009600 +I12096000 +tp149 +I329 +s(I12960000 +I13046400 +tp150 +I8 +s(I12614400 +I12700800 +tp151 +I210 +s(I3801600 +I3888000 +tp152 +I32 +s(I10627200 +I10713600 +tp153 +I43 +s(I44928000 +I45014400 +tp154 +I2 +s(I8208000 +I8294400 +tp155 +I38 +s(I8640000 +I8726400 +tp156 +I32 +s(I7344000 +I7430400 +tp157 +I12689 +s(I49075200 +I49161600 +tp158 +I19 +s(I2764800 +I2851200 +tp159 +I76 +s(I2592000 +I2678400 +tp160 +I40 +s(I2073600 +I2160000 +tp161 +I388 +s(I37497600 +I37584000 +tp162 +I11 +s(I1641600 +I1728000 +tp163 +I78 +s(I12873600 +I12960000 +tp164 +I5 +s(I1814400 +I1900800 +tp165 +I1860 +s(I40176000 +I40262400 +tp166 +I1 +s(I3715200 +I3801600 +tp167 +I104 +s(I2332800 +I2419200 +tp168 +I12 +s(I2678400 +I2764800 +tp169 +I278 +s(I12268800 +I12355200 +tp170 +I2 +s(I28771200 +I28857600 +tp171 +I6 +s(I41990400 +I42076800 +tp172 +I10 +sssS'last-complete-bucket' +p173 +NsS'current-cycle' +p174 +Ns. \ No newline at end of file From f81e4e2d25e4d12362f05824075915d52b3878cc Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 13:15:38 -0600 Subject: [PATCH 06/35] refactor to use serializers / pickle->json upgraders --- src/allmydata/storage/crawler.py | 143 +++++++++++++++++++++++-- src/allmydata/storage/expirer.py | 82 +++++++++++--- src/allmydata/storage/server.py | 1 + src/allmydata/test/test_storage_web.py | 10 +- 4 files changed, 212 insertions(+), 24 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index b931f1ab5..48b03ec8b 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -19,12 +19,145 @@ import json import struct from twisted.internet import reactor from twisted.application import service +from twisted.python.filepath import FilePath from allmydata.storage.common import si_b2a from allmydata.util import fileutil class TimeSliceExceeded(Exception): pass + +def _convert_pickle_state_to_json(state): + """ + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list + # ["leases-per-share-histogram"] gets str keys instead of int + # ["cycle-start-finish-times"] from 2-tuple to list + # ["configured-expiration-mode"] from 4-tuple to list + # ["history"] keys are strings + if state["version"] != 1: + raise ValueError( + "Unknown version {version} in pickle state".format(**state) + ) + + def convert_lpsh(value): + return { + str(k): v + for k, v in value.items() + } + + def convert_cem(value): + # original is a 4-tuple, with the last element being a 2-tuple + # .. convert both to lists + return [ + value[0], + value[1], + value[2], + list(value[3]), + ] + + def convert_history(value): + print("convert history") + print(value) + return { + str(k): v + for k, v in value + } + + converters = { + "cycle-to-date": list, + "leases-per-share-histogram": convert_lpsh, + "cycle-starte-finish-times": list, + "configured-expiration-mode": convert_cem, + "history": convert_history, + } + + def convert_value(key, value): + converter = converters.get(key, None) + if converter is None: + return value + return converter(value) + + new_state = { + k: convert_value(k, v) + for k, v in state.items() + } + return new_state + + +def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): + """ + :param FilePath state_path: the filepath to ensure is json + + :param Callable[dict] convert_pickle: function to change + pickle-style state into JSON-style state + + :returns unicode: the local path where the state is stored + + If this state path is JSON, simply return it. + + If this state is pickle, convert to the JSON format and return the + JSON path. + """ + if state_path.path.endswith(".json"): + return state_path.path + + json_state_path = state_path.siblingExtension(".json") + + # if there's no file there at all, we're done because there's + # nothing to upgrade + if not state_path.exists(): + return json_state_path.path + + # upgrade the pickle data to JSON + import pickle + with state_path.open("r") as f: + state = pickle.load(f) + state = convert_pickle(state) + json_state_path = state_path.siblingExtension(".json") + with json_state_path.open("w") as f: + json.dump(state, f) + # we've written the JSON, delete the pickle + state_path.remove() + return json_state_path.path + + +class _LeaseStateSerializer(object): + """ + Read and write state for LeaseCheckingCrawler. This understands + how to read the legacy pickle format files and upgrade them to the + new JSON format (which will occur automatically). + """ + + def __init__(self, state_path): + self._path = FilePath( + _maybe_upgrade_pickle_to_json( + FilePath(state_path), + _convert_pickle_state_to_json, + ) + ) + # XXX want this to .. load and save the state + # - if the state is pickle-only: + # - load it and convert to json format + # - save json + # - delete pickle + # - if the state is json, load it + + def load(self): + with self._path.open("r") as f: + return json.load(f) + + def save(self, data): + tmpfile = self._path.siblingExtension(".tmp") + with tmpfile.open("wb") as f: + json.dump(data, f) + fileutil.move_into_place(tmpfile.path, self._path.path) + return None + + class ShareCrawler(service.MultiService): """A ShareCrawler subclass is attached to a StorageServer, and periodically walks all of its shares, processing each one in some @@ -87,7 +220,7 @@ class ShareCrawler(service.MultiService): self.allowed_cpu_percentage = allowed_cpu_percentage self.server = server self.sharedir = server.sharedir - self.statefile = statefile + self._state_serializer = _LeaseStateSerializer(statefile) self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] if PY3: @@ -210,8 +343,7 @@ class ShareCrawler(service.MultiService): # of the last bucket to be processed, or # None if we are sleeping between cycles try: - with open(self.statefile, "rb") as f: - state = json.load(f) + state = self._state_serializer.load() except Exception: state = {"version": 1, "last-cycle-finished": None, @@ -247,14 +379,11 @@ class ShareCrawler(service.MultiService): else: last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix - tmpfile = self.statefile + ".tmp" # Note: we use self.get_state() here because e.g # LeaseCheckingCrawler stores non-JSON-able state in # self.state() but converts it in self.get_state() - with open(tmpfile, "wb") as f: - json.dump(self.get_state(), f) - fileutil.move_into_place(tmpfile, self.statefile) + self._state_serializer.save(self.get_state()) def startService(self): # arrange things to look like we were just sleeping, so diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 4513dadb2..d2f48004a 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -10,11 +10,72 @@ import json import time import os import struct -from allmydata.storage.crawler import ShareCrawler +from allmydata.storage.crawler import ( + ShareCrawler, + _maybe_upgrade_pickle_to_json, +) from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ UnknownImmutableContainerVersionError from twisted.python import log as twlog +from twisted.python.filepath import FilePath + + +def _convert_pickle_state_to_json(state): + """ + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + print("CONVERT", state) + for k, v in state.items(): + print(k, v) + if state["version"] != 1: + raise ValueError( + "Unknown version {version} in pickle state".format(**state) + ) + + return state + + +class _HistorySerializer(object): + """ + Serialize the 'history' file of the lease-crawler state. This is + "storage/history.state" for the pickle or + "storage/history.state.json" for the new JSON format. + """ + + def __init__(self, history_path): + self._path = FilePath( + _maybe_upgrade_pickle_to_json( + FilePath(history_path), + _convert_pickle_state_to_json, + ) + ) + if not self._path.exists(): + with self._path.open("wb") as f: + json.dump({}, f) + + def read(self): + """ + Deserialize the existing data. + + :return dict: the existing history state + """ + assert self._path is not None, "Not initialized" + with self._path.open("rb") as f: + history = json.load(f) + return history + + def write(self, new_history): + """ + Serialize the existing data as JSON. + """ + assert self._path is not None, "Not initialized" + with self._path.open("wb") as f: + json.dump(new_history, f) + return None + class LeaseCheckingCrawler(ShareCrawler): """I examine the leases on all shares, determining which are still valid @@ -64,7 +125,8 @@ class LeaseCheckingCrawler(ShareCrawler): override_lease_duration, # used if expiration_mode=="age" cutoff_date, # used if expiration_mode=="cutoff-date" sharetypes): - self.historyfile = historyfile + self._history_serializer = _HistorySerializer(historyfile) + ##self.historyfile = historyfile self.expiration_enabled = expiration_enabled self.mode = mode self.override_lease_duration = None @@ -92,12 +154,6 @@ class LeaseCheckingCrawler(ShareCrawler): for k in so_far: self.state["cycle-to-date"].setdefault(k, so_far[k]) - # initialize history - if not os.path.exists(self.historyfile): - history = {} # cyclenum -> dict - with open(self.historyfile, "wb") as f: - json.dump(history, f) - def create_empty_cycle_dict(self): recovered = self.create_empty_recovered_dict() so_far = {"corrupt-shares": [], @@ -315,14 +371,12 @@ class LeaseCheckingCrawler(ShareCrawler): # copy() needs to become a deepcopy h["space-recovered"] = s["space-recovered"].copy() - with open(self.historyfile, "rb") as f: - history = json.load(f) + history = self._history_serializer.read() history[str(cycle)] = h while len(history) > 10: oldcycles = sorted(int(k) for k in history.keys()) del history[str(oldcycles[0])] - with open(self.historyfile, "wb") as f: - json.dump(history, f) + self._history_serializer.write(history) def get_state(self): """In addition to the crawler state described in @@ -391,9 +445,7 @@ class LeaseCheckingCrawler(ShareCrawler): progress = self.get_progress() state = ShareCrawler.get_state(self) # does a shallow copy - with open(self.historyfile, "rb") as f: - history = json.load(f) - state["history"] = history + state["history"] = self._history_serializer.read() if not progress["cycle-in-progress"]: del state["cycle-to-date"] diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 49cb7fa82..9211535b7 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -57,6 +57,7 @@ DEFAULT_RENEWAL_TIME = 31 * 24 * 60 * 60 @implementer(RIStorageServer, IStatsProducer) class StorageServer(service.MultiService, Referenceable): name = 'storage' + # only the tests change this to anything else LeaseCheckerClass = LeaseCheckingCrawler def __init__(self, storedir, nodeid, reserved_space=0, diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index b9fa548d3..d91242449 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -25,14 +25,20 @@ from twisted.trial import unittest from twisted.internet import defer from twisted.application import service from twisted.web.template import flattenString +from twisted.python.filepath import FilePath from foolscap.api import fireEventually from allmydata.util import fileutil, hashutil, base32, pollmixin from allmydata.storage.common import storage_index_to_dir, \ UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError from allmydata.storage.server import StorageServer -from allmydata.storage.crawler import BucketCountingCrawler -from allmydata.storage.expirer import LeaseCheckingCrawler +from allmydata.storage.crawler import ( + BucketCountingCrawler, + _LeaseStateSerializer, +) +from allmydata.storage.expirer import ( + LeaseCheckingCrawler, +) from allmydata.web.storage import ( StorageStatus, StorageStatusElement, From bf5e682d71e086f351126129c2e586a80442c2bb Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 13:17:46 -0600 Subject: [PATCH 07/35] test upgrade of main state works --- src/allmydata/test/test_storage_web.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index d91242449..0b287d667 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1145,6 +1145,22 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): d.addBoth(_cleanup) return d + def test_deserialize_pickle(self): + """ + The crawler can read existing state from the old pickle format + """ + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state") + test_pickle = FilePath("lease_checker.state") + with test_pickle.open("w") as local, original_pickle.open("r") as remote: + local.write(remote.read()) + + serial = _LeaseStateSerializer(test_pickle.path) + + # the (existing) state file should have been upgraded to JSON + self.assertNot(test_pickle.exists()) + self.assertTrue(test_pickle.siblingExtension(".json").exists()) + + class WebStatus(unittest.TestCase, pollmixin.PollMixin): From 89c2aacadca5cc7ba13f4afda2c4d3817ea9b5c0 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 15:52:01 -0600 Subject: [PATCH 08/35] working test of 'in the wild' data, working converters --- src/allmydata/storage/crawler.py | 36 +++--- src/allmydata/test/test_storage_web.py | 165 +++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 17 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 48b03ec8b..548864e06 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -34,7 +34,7 @@ def _convert_pickle_state_to_json(state): :return dict: the state in the JSON form """ # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list - # ["leases-per-share-histogram"] gets str keys instead of int + # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int # ["cycle-start-finish-times"] from 2-tuple to list # ["configured-expiration-mode"] from 4-tuple to list # ["history"] keys are strings @@ -43,12 +43,6 @@ def _convert_pickle_state_to_json(state): "Unknown version {version} in pickle state".format(**state) ) - def convert_lpsh(value): - return { - str(k): v - for k, v in value.items() - } - def convert_cem(value): # original is a 4-tuple, with the last element being a 2-tuple # .. convert both to lists @@ -59,20 +53,28 @@ def _convert_pickle_state_to_json(state): list(value[3]), ] - def convert_history(value): - print("convert history") - print(value) + def convert_ctd(value): + ctd_converter = { + "lease-age-histogram": lambda value: { + "{},{}".format(k[0], k[1]): v + for k, v in value.items() + }, + "corrupt-shares": lambda value: [ + list(x) + for x in value + ], + } return { - str(k): v - for k, v in value + k: ctd_converter.get(k, lambda z: z)(v) + for k, v in value.items() } + # we don't convert "history" here because that's in a separate + # file; see expirer.py converters = { - "cycle-to-date": list, - "leases-per-share-histogram": convert_lpsh, + "cycle-to-date": convert_ctd, "cycle-starte-finish-times": list, "configured-expiration-mode": convert_cem, - "history": convert_history, } def convert_value(key, value): @@ -116,10 +118,10 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): import pickle with state_path.open("r") as f: state = pickle.load(f) - state = convert_pickle(state) + new_state = convert_pickle(state) json_state_path = state_path.siblingExtension(".json") with json_state_path.open("w") as f: - json.dump(state, f) + json.dump(new_state, f) # we've written the JSON, delete the pickle state_path.remove() return json_state_path.path diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 0b287d667..5cdf02a25 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1160,6 +1160,171 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): self.assertNot(test_pickle.exists()) self.assertTrue(test_pickle.siblingExtension(".json").exists()) + self.assertEqual( + serial.load(), + { + u'last-complete-prefix': None, + u'version': 1, + u'current-cycle-start-time': 1635003106.611748, + u'last-cycle-finished': 312, + u'cycle-to-date': { + u'leases-per-share-histogram': { + u'1': 36793, + u'2': 1, + }, + u'space-recovered': { + u'examined-buckets-immutable': 17183, + u'configured-buckets-mutable': 0, + u'examined-shares-mutable': 1796, + u'original-shares-mutable': 1563, + u'configured-buckets-immutable': 0, + u'original-shares-immutable': 27926, + u'original-diskbytes-immutable': 431149056, + u'examined-shares-immutable': 34998, + u'original-buckets': 14661, + u'actual-shares-immutable': 0, + u'configured-shares': 0, + u'original-buckets-mutable': 899, + u'actual-diskbytes': 4096, + u'actual-shares-mutable': 0, + u'configured-buckets': 1, + u'examined-buckets-unknown': 14, + u'actual-sharebytes': 0, + u'original-shares': 29489, + u'actual-buckets-immutable': 0, + u'original-sharebytes': 312664812, + u'examined-sharebytes-immutable': 383801602, + u'actual-shares': 0, + u'actual-sharebytes-immutable': 0, + u'original-diskbytes': 441643008, + u'configured-diskbytes-mutable': 0, + u'configured-sharebytes-immutable': 0, + u'configured-shares-mutable': 0, + u'actual-diskbytes-immutable': 0, + u'configured-diskbytes-immutable': 0, + u'original-diskbytes-mutable': 10489856, + u'actual-sharebytes-mutable': 0, + u'configured-sharebytes': 0, + u'examined-shares': 36794, + u'actual-diskbytes-mutable': 0, + u'actual-buckets': 1, + u'original-buckets-immutable': 13761, + u'configured-sharebytes-mutable': 0, + u'examined-sharebytes': 390369660, + u'original-sharebytes-immutable': 308125753, + u'original-sharebytes-mutable': 4539059, + u'actual-buckets-mutable': 0, + u'examined-buckets-mutable': 1043, + u'configured-shares-immutable': 0, + u'examined-diskbytes': 476598272, + u'examined-diskbytes-mutable': 9154560, + u'examined-sharebytes-mutable': 6568058, + u'examined-buckets': 18241, + u'configured-diskbytes': 4096, + u'examined-diskbytes-immutable': 467443712}, + u'corrupt-shares': [ + [u'2dn6xnlnsqwtnapwxfdivpm3s4', 4], + [u'2dn6xnlnsqwtnapwxfdivpm3s4', 1], + [u'2rrzthwsrrxolevmwdvbdy3rqi', 4], + [u'2rrzthwsrrxolevmwdvbdy3rqi', 1], + [u'2skfngcto6h7eqmn4uo7ntk3ne', 4], + [u'2skfngcto6h7eqmn4uo7ntk3ne', 1], + [u'32d5swqpqx2mwix7xmqzvhdwje', 4], + [u'32d5swqpqx2mwix7xmqzvhdwje', 1], + [u'5mmayp66yflmpon3o6unsnbaca', 4], + [u'5mmayp66yflmpon3o6unsnbaca', 1], + [u'6ixhpvbtre7fnrl6pehlrlflc4', 4], + [u'6ixhpvbtre7fnrl6pehlrlflc4', 1], + [u'ewzhvswjsz4vp2bqkb6mi3bz2u', 4], + [u'ewzhvswjsz4vp2bqkb6mi3bz2u', 1], + [u'fu7pazf6ogavkqj6z4q5qqex3u', 4], + [u'fu7pazf6ogavkqj6z4q5qqex3u', 1], + [u'hbyjtqvpcimwxiyqbcbbdn2i4a', 4], + [u'hbyjtqvpcimwxiyqbcbbdn2i4a', 1], + [u'pmcjbdkbjdl26k3e6yja77femq', 4], + [u'pmcjbdkbjdl26k3e6yja77femq', 1], + [u'r6swof4v2uttbiiqwj5pi32cm4', 4], + [u'r6swof4v2uttbiiqwj5pi32cm4', 1], + [u't45v5akoktf53evc2fi6gwnv6y', 4], + [u't45v5akoktf53evc2fi6gwnv6y', 1], + [u'y6zb4faar3rdvn3e6pfg4wlotm', 4], + [u'y6zb4faar3rdvn3e6pfg4wlotm', 1], + [u'z3yghutvqoqbchjao4lndnrh3a', 4], + [u'z3yghutvqoqbchjao4lndnrh3a', 1], + ], + u'lease-age-histogram': { + "1641600,1728000": 78, + "12441600,12528000": 78, + "8640000,8726400": 32, + "1814400,1900800": 1860, + "2764800,2851200": 76, + "11491200,11577600": 20, + "10713600,10800000": 183, + "47865600,47952000": 7, + "3110400,3196800": 328, + "10627200,10713600": 43, + "45619200,45705600": 4, + "12873600,12960000": 5, + "7430400,7516800": 7228, + "1555200,1641600": 492, + "38880000,38966400": 3, + "12528000,12614400": 193, + "7344000,7430400": 12689, + "2678400,2764800": 278, + "2332800,2419200": 12, + "9244800,9331200": 73, + "12787200,12873600": 218, + "49075200,49161600": 19, + "10368000,10454400": 117, + "4665600,4752000": 256, + "7516800,7603200": 993, + "42336000,42422400": 33, + "10972800,11059200": 122, + "39052800,39139200": 51, + "12614400,12700800": 210, + "7603200,7689600": 2004, + "10540800,10627200": 16, + "950400,1036800": 4435, + "42076800,42163200": 4, + "8812800,8899200": 57, + "5788800,5875200": 954, + "36374400,36460800": 3, + "9331200,9417600": 12, + "30499200,30585600": 5, + "12700800,12787200": 25, + "2073600,2160000": 388, + "12960000,13046400": 8, + "11923200,12009600": 89, + "3369600,3456000": 79, + "3196800,3283200": 628, + "37497600,37584000": 11, + "33436800,33523200": 7, + "44928000,45014400": 2, + "37929600,38016000": 3, + "38966400,39052800": 61, + "3283200,3369600": 86, + "11750400,11836800": 7, + "3801600,3888000": 32, + "46310400,46396800": 1, + "4838400,4924800": 386, + "8208000,8294400": 38, + "37411200,37497600": 4, + "12009600,12096000": 329, + "10454400,10540800": 1239, + "40176000,40262400": 1, + "3715200,3801600": 104, + "44409600,44496000": 13, + "38361600,38448000": 5, + "12268800,12355200": 2, + "28771200,28857600": 6, + "41990400,42076800": 10, + "2592000,2678400": 40, + }, + }, + 'current-cycle': None, + 'last-complete-bucket': None, + } + ) class WebStatus(unittest.TestCase, pollmixin.PollMixin): From d4fc14f9ada372e94d68667da64643b52de9923c Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 19:42:08 -0600 Subject: [PATCH 09/35] docstring --- src/allmydata/storage/expirer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index d2f48004a..ce126b6a4 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -23,6 +23,9 @@ from twisted.python.filepath import FilePath def _convert_pickle_state_to_json(state): """ + Convert a pickle-serialized crawler-history state to the new JSON + format. + :param dict state: the pickled state :return dict: the state in the JSON form From 75410e51f04f90007efce9fa2cba6504c54fe9ac Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:10:43 -0600 Subject: [PATCH 10/35] refactor --- src/allmydata/storage/crawler.py | 86 ++++++++++++++++++-------------- src/allmydata/storage/expirer.py | 14 ++---- 2 files changed, 53 insertions(+), 47 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 548864e06..2e9bafd13 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -27,23 +27,14 @@ class TimeSliceExceeded(Exception): pass -def _convert_pickle_state_to_json(state): +def _convert_cycle_data(state): """ - :param dict state: the pickled state + :param dict state: cycle-to-date or history-item state :return dict: the state in the JSON form """ - # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list - # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int - # ["cycle-start-finish-times"] from 2-tuple to list - # ["configured-expiration-mode"] from 4-tuple to list - # ["history"] keys are strings - if state["version"] != 1: - raise ValueError( - "Unknown version {version} in pickle state".format(**state) - ) - def convert_cem(value): + def _convert_expiration_mode(value): # original is a 4-tuple, with the last element being a 2-tuple # .. convert both to lists return [ @@ -53,41 +44,60 @@ def _convert_pickle_state_to_json(state): list(value[3]), ] - def convert_ctd(value): - ctd_converter = { - "lease-age-histogram": lambda value: { + def _convert_lease_age(value): + # if we're in cycle-to-date, this is a dict + if isinstance(value, dict): + return { "{},{}".format(k[0], k[1]): v for k, v in value.items() - }, - "corrupt-shares": lambda value: [ - list(x) - for x in value - ], - } - return { - k: ctd_converter.get(k, lambda z: z)(v) - for k, v in value.items() - } + } + # otherwise, it's a history-item and they're 3-tuples + return [ + list(v) + for v in value + ] - # we don't convert "history" here because that's in a separate - # file; see expirer.py converters = { - "cycle-to-date": convert_ctd, - "cycle-starte-finish-times": list, - "configured-expiration-mode": convert_cem, + "configured-expiration-mode": _convert_expiration_mode, + "cycle-start-finish-times": list, + "lease-age-histogram": _convert_lease_age, + "corrupt-shares": lambda value: [ + list(x) + for x in value + ], + "leases-per-share-histogram": lambda value: { + str(k): v + for k, v in value.items() + }, + } + return { + k: converters.get(k, lambda z: z)(v) + for k, v in state.items() } - def convert_value(key, value): - converter = converters.get(key, None) - if converter is None: - return value - return converter(value) - new_state = { - k: convert_value(k, v) +def _convert_pickle_state_to_json(state): + """ + :param dict state: the pickled state + + :return dict: the state in the JSON form + """ + # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list + # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int + # ["cycle-start-finish-times"] from 2-tuple to list + # ["history"] keys are strings + if state["version"] != 1: + raise ValueError( + "Unknown version {version} in pickle state".format(**state) + ) + + converters = { + "cycle-to-date": _convert_cycle_data, + } + return { + k: converters.get(k, lambda x: x)(v) for k, v in state.items() } - return new_state def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index ce126b6a4..946498eaf 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -13,6 +13,7 @@ import struct from allmydata.storage.crawler import ( ShareCrawler, _maybe_upgrade_pickle_to_json, + _convert_cycle_data, ) from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ @@ -30,15 +31,10 @@ def _convert_pickle_state_to_json(state): :return dict: the state in the JSON form """ - print("CONVERT", state) - for k, v in state.items(): - print(k, v) - if state["version"] != 1: - raise ValueError( - "Unknown version {version} in pickle state".format(**state) - ) - - return state + return { + str(k): _convert_cycle_data(v) + for k, v in state.items() + } class _HistorySerializer(object): From a867294e00addbf4a0f4426820beb07895b81441 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:12:17 -0600 Subject: [PATCH 11/35] dead --- src/allmydata/storage/expirer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 946498eaf..254264e38 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -61,7 +61,6 @@ class _HistorySerializer(object): :return dict: the existing history state """ - assert self._path is not None, "Not initialized" with self._path.open("rb") as f: history = json.load(f) return history @@ -70,7 +69,6 @@ class _HistorySerializer(object): """ Serialize the existing data as JSON. """ - assert self._path is not None, "Not initialized" with self._path.open("wb") as f: json.dump(new_history, f) return None From 94670461f1d93bef6766652a03a2b9bd85916224 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:37:51 -0600 Subject: [PATCH 12/35] tests --- src/allmydata/storage/expirer.py | 10 +- src/allmydata/test/test_storage_web.py | 168 +++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 254264e38..9ba71539c 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -55,7 +55,7 @@ class _HistorySerializer(object): with self._path.open("wb") as f: json.dump({}, f) - def read(self): + def load(self): """ Deserialize the existing data. @@ -65,7 +65,7 @@ class _HistorySerializer(object): history = json.load(f) return history - def write(self, new_history): + def save(self, new_history): """ Serialize the existing data as JSON. """ @@ -368,12 +368,12 @@ class LeaseCheckingCrawler(ShareCrawler): # copy() needs to become a deepcopy h["space-recovered"] = s["space-recovered"].copy() - history = self._history_serializer.read() + history = self._history_serializer.load() history[str(cycle)] = h while len(history) > 10: oldcycles = sorted(int(k) for k in history.keys()) del history[str(oldcycles[0])] - self._history_serializer.write(history) + self._history_serializer.save(history) def get_state(self): """In addition to the crawler state described in @@ -442,7 +442,7 @@ class LeaseCheckingCrawler(ShareCrawler): progress = self.get_progress() state = ShareCrawler.get_state(self) # does a shallow copy - state["history"] = self._history_serializer.read() + state["history"] = self._history_serializer.load() if not progress["cycle-in-progress"]: del state["cycle-to-date"] diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 5cdf02a25..033462d46 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -38,6 +38,7 @@ from allmydata.storage.crawler import ( ) from allmydata.storage.expirer import ( LeaseCheckingCrawler, + _HistorySerializer, ) from allmydata.web.storage import ( StorageStatus, @@ -1149,6 +1150,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): """ The crawler can read existing state from the old pickle format """ + # this file came from an "in the wild" tahoe version 1.16.0 original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state") test_pickle = FilePath("lease_checker.state") with test_pickle.open("w") as local, original_pickle.open("r") as remote: @@ -1326,6 +1328,172 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): } ) + def test_deserialize_history_pickle(self): + """ + The crawler can read existing history state from the old pickle + format + """ + # this file came from an "in the wild" tahoe version 1.16.0 + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history") + test_pickle = FilePath("lease_checker.history") + with test_pickle.open("w") as local, original_pickle.open("r") as remote: + local.write(remote.read()) + + serial = _HistorySerializer(test_pickle.path) + + self.maxDiff = None + self.assertEqual( + serial.load(), + { + "363": { + 'configured-expiration-mode': ['age', None, None, ['immutable', 'mutable']], + 'expiration-enabled': False, + 'leases-per-share-histogram': { + '1': 39774, + }, + 'lease-age-histogram': [ + [0, 86400, 3125], + [345600, 432000, 4175], + [950400, 1036800, 141], + [1036800, 1123200, 345], + [1123200, 1209600, 81], + [1296000, 1382400, 1832], + [1555200, 1641600, 390], + [1728000, 1814400, 12], + [2073600, 2160000, 84], + [2160000, 2246400, 228], + [2246400, 2332800, 75], + [2592000, 2678400, 644], + [2678400, 2764800, 273], + [2764800, 2851200, 94], + [2851200, 2937600, 97], + [3196800, 3283200, 143], + [3283200, 3369600, 48], + [4147200, 4233600, 374], + [4320000, 4406400, 534], + [5270400, 5356800, 1005], + [6739200, 6825600, 8704], + [6825600, 6912000, 3986], + [6912000, 6998400, 7592], + [6998400, 7084800, 2607], + [7689600, 7776000, 35], + [8035200, 8121600, 33], + [8294400, 8380800, 54], + [8640000, 8726400, 45], + [8726400, 8812800, 27], + [8812800, 8899200, 12], + [9763200, 9849600, 77], + [9849600, 9936000, 91], + [9936000, 10022400, 1210], + [10022400, 10108800, 45], + [10108800, 10195200, 186], + [10368000, 10454400, 113], + [10972800, 11059200, 21], + [11232000, 11318400, 5], + [11318400, 11404800, 19], + [11404800, 11491200, 238], + [11491200, 11577600, 159], + [11750400, 11836800, 1], + [11836800, 11923200, 32], + [11923200, 12009600, 192], + [12009600, 12096000, 222], + [12096000, 12182400, 18], + [12182400, 12268800, 224], + [12268800, 12355200, 9], + [12355200, 12441600, 9], + [12441600, 12528000, 10], + [12528000, 12614400, 6], + [12614400, 12700800, 6], + [12700800, 12787200, 18], + [12787200, 12873600, 6], + [12873600, 12960000, 62], + ], + 'cycle-start-finish-times': [1634446505.241972, 1634446666.055401], + 'space-recovered': { + 'examined-buckets-immutable': 17896, + 'configured-buckets-mutable': 0, + 'examined-shares-mutable': 2473, + 'original-shares-mutable': 1185, + 'configured-buckets-immutable': 0, + 'original-shares-immutable': 27457, + 'original-diskbytes-immutable': 2810982400, + 'examined-shares-immutable': 37301, + 'original-buckets': 14047, + 'actual-shares-immutable': 0, + 'configured-shares': 0, + 'original-buckets-mutable': 691, + 'actual-diskbytes': 4096, + 'actual-shares-mutable': 0, + 'configured-buckets': 1, + 'examined-buckets-unknown': 14, + 'actual-sharebytes': 0, + 'original-shares': 28642, + 'actual-buckets-immutable': 0, + 'original-sharebytes': 2695552941, + 'examined-sharebytes-immutable': 2754798505, + 'actual-shares': 0, + 'actual-sharebytes-immutable': 0, + 'original-diskbytes': 2818981888, + 'configured-diskbytes-mutable': 0, + 'configured-sharebytes-immutable': 0, + 'configured-shares-mutable': 0, + 'actual-diskbytes-immutable': 0, + 'configured-diskbytes-immutable': 0, + 'original-diskbytes-mutable': 7995392, + 'actual-sharebytes-mutable': 0, + 'configured-sharebytes': 0, + 'examined-shares': 39774, + 'actual-diskbytes-mutable': 0, + 'actual-buckets': 1, + 'original-buckets-immutable': 13355, + 'configured-sharebytes-mutable': 0, + 'examined-sharebytes': 2763646972, + 'original-sharebytes-immutable': 2692076909, + 'original-sharebytes-mutable': 3476032, + 'actual-buckets-mutable': 0, + 'examined-buckets-mutable': 1286, + 'configured-shares-immutable': 0, + 'examined-diskbytes': 2854801408, + 'examined-diskbytes-mutable': 12161024, + 'examined-sharebytes-mutable': 8848467, + 'examined-buckets': 19197, + 'configured-diskbytes': 4096, + 'examined-diskbytes-immutable': 2842640384 + }, + 'corrupt-shares': [ + ['2dn6xnlnsqwtnapwxfdivpm3s4', 3], + ['2dn6xnlnsqwtnapwxfdivpm3s4', 0], + ['2rrzthwsrrxolevmwdvbdy3rqi', 3], + ['2rrzthwsrrxolevmwdvbdy3rqi', 0], + ['2skfngcto6h7eqmn4uo7ntk3ne', 3], + ['2skfngcto6h7eqmn4uo7ntk3ne', 0], + ['32d5swqpqx2mwix7xmqzvhdwje', 3], + ['32d5swqpqx2mwix7xmqzvhdwje', 0], + ['5mmayp66yflmpon3o6unsnbaca', 3], + ['5mmayp66yflmpon3o6unsnbaca', 0], + ['6ixhpvbtre7fnrl6pehlrlflc4', 3], + ['6ixhpvbtre7fnrl6pehlrlflc4', 0], + ['ewzhvswjsz4vp2bqkb6mi3bz2u', 3], + ['ewzhvswjsz4vp2bqkb6mi3bz2u', 0], + ['fu7pazf6ogavkqj6z4q5qqex3u', 3], + ['fu7pazf6ogavkqj6z4q5qqex3u', 0], + ['hbyjtqvpcimwxiyqbcbbdn2i4a', 3], + ['hbyjtqvpcimwxiyqbcbbdn2i4a', 0], + ['pmcjbdkbjdl26k3e6yja77femq', 3], + ['pmcjbdkbjdl26k3e6yja77femq', 0], + ['r6swof4v2uttbiiqwj5pi32cm4', 3], + ['r6swof4v2uttbiiqwj5pi32cm4', 0], + ['t45v5akoktf53evc2fi6gwnv6y', 3], + ['t45v5akoktf53evc2fi6gwnv6y', 0], + ['y6zb4faar3rdvn3e6pfg4wlotm', 3], + ['y6zb4faar3rdvn3e6pfg4wlotm', 0], + ['z3yghutvqoqbchjao4lndnrh3a', 3], + ['z3yghutvqoqbchjao4lndnrh3a', 0], + ] + } + } + ) + class WebStatus(unittest.TestCase, pollmixin.PollMixin): From 069c332a6815c6c67b77a7af328e7bb2993d175d Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:49:25 -0600 Subject: [PATCH 13/35] straight assert --- src/allmydata/storage/crawler.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 2e9bafd13..d1366765e 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -86,10 +86,7 @@ def _convert_pickle_state_to_json(state): # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int # ["cycle-start-finish-times"] from 2-tuple to list # ["history"] keys are strings - if state["version"] != 1: - raise ValueError( - "Unknown version {version} in pickle state".format(**state) - ) + assert state["version"] == 1, "Only known version is 1" converters = { "cycle-to-date": _convert_cycle_data, From 9b3c55e4aa856e48b6d6fb5ee0252d69aeb64110 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 21:59:29 -0600 Subject: [PATCH 14/35] test a second deserialzation --- src/allmydata/test/test_storage_web.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 033462d46..70866cba9 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1327,6 +1327,11 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): 'last-complete-bucket': None, } ) + second_serial = _LeaseStateSerializer(serial._path.path) + self.assertEqual( + serial.load(), + second_serial.load(), + ) def test_deserialize_history_pickle(self): """ From 4f64bbaa0086af61745f7f55fd04fb716f018960 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 25 Oct 2021 22:15:49 -0600 Subject: [PATCH 15/35] data --- src/allmydata/test/data/lease_checker.history | 501 ++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 src/allmydata/test/data/lease_checker.history diff --git a/src/allmydata/test/data/lease_checker.history b/src/allmydata/test/data/lease_checker.history new file mode 100644 index 000000000..0c27a5ad0 --- /dev/null +++ b/src/allmydata/test/data/lease_checker.history @@ -0,0 +1,501 @@ +(dp0 +I363 +(dp1 +Vconfigured-expiration-mode +p2 +(S'age' +p3 +NN(S'immutable' +p4 +S'mutable' +p5 +tp6 +tp7 +sVexpiration-enabled +p8 +I00 +sVleases-per-share-histogram +p9 +(dp10 +I1 +I39774 +ssVlease-age-histogram +p11 +(lp12 +(I0 +I86400 +I3125 +tp13 +a(I345600 +I432000 +I4175 +tp14 +a(I950400 +I1036800 +I141 +tp15 +a(I1036800 +I1123200 +I345 +tp16 +a(I1123200 +I1209600 +I81 +tp17 +a(I1296000 +I1382400 +I1832 +tp18 +a(I1555200 +I1641600 +I390 +tp19 +a(I1728000 +I1814400 +I12 +tp20 +a(I2073600 +I2160000 +I84 +tp21 +a(I2160000 +I2246400 +I228 +tp22 +a(I2246400 +I2332800 +I75 +tp23 +a(I2592000 +I2678400 +I644 +tp24 +a(I2678400 +I2764800 +I273 +tp25 +a(I2764800 +I2851200 +I94 +tp26 +a(I2851200 +I2937600 +I97 +tp27 +a(I3196800 +I3283200 +I143 +tp28 +a(I3283200 +I3369600 +I48 +tp29 +a(I4147200 +I4233600 +I374 +tp30 +a(I4320000 +I4406400 +I534 +tp31 +a(I5270400 +I5356800 +I1005 +tp32 +a(I6739200 +I6825600 +I8704 +tp33 +a(I6825600 +I6912000 +I3986 +tp34 +a(I6912000 +I6998400 +I7592 +tp35 +a(I6998400 +I7084800 +I2607 +tp36 +a(I7689600 +I7776000 +I35 +tp37 +a(I8035200 +I8121600 +I33 +tp38 +a(I8294400 +I8380800 +I54 +tp39 +a(I8640000 +I8726400 +I45 +tp40 +a(I8726400 +I8812800 +I27 +tp41 +a(I8812800 +I8899200 +I12 +tp42 +a(I9763200 +I9849600 +I77 +tp43 +a(I9849600 +I9936000 +I91 +tp44 +a(I9936000 +I10022400 +I1210 +tp45 +a(I10022400 +I10108800 +I45 +tp46 +a(I10108800 +I10195200 +I186 +tp47 +a(I10368000 +I10454400 +I113 +tp48 +a(I10972800 +I11059200 +I21 +tp49 +a(I11232000 +I11318400 +I5 +tp50 +a(I11318400 +I11404800 +I19 +tp51 +a(I11404800 +I11491200 +I238 +tp52 +a(I11491200 +I11577600 +I159 +tp53 +a(I11750400 +I11836800 +I1 +tp54 +a(I11836800 +I11923200 +I32 +tp55 +a(I11923200 +I12009600 +I192 +tp56 +a(I12009600 +I12096000 +I222 +tp57 +a(I12096000 +I12182400 +I18 +tp58 +a(I12182400 +I12268800 +I224 +tp59 +a(I12268800 +I12355200 +I9 +tp60 +a(I12355200 +I12441600 +I9 +tp61 +a(I12441600 +I12528000 +I10 +tp62 +a(I12528000 +I12614400 +I6 +tp63 +a(I12614400 +I12700800 +I6 +tp64 +a(I12700800 +I12787200 +I18 +tp65 +a(I12787200 +I12873600 +I6 +tp66 +a(I12873600 +I12960000 +I62 +tp67 +asVcycle-start-finish-times +p68 +(F1634446505.241972 +F1634446666.055401 +tp69 +sVspace-recovered +p70 +(dp71 +Vexamined-buckets-immutable +p72 +I17896 +sVconfigured-buckets-mutable +p73 +I0 +sVexamined-shares-mutable +p74 +I2473 +sVoriginal-shares-mutable +p75 +I1185 +sVconfigured-buckets-immutable +p76 +I0 +sVoriginal-shares-immutable +p77 +I27457 +sVoriginal-diskbytes-immutable +p78 +I2810982400 +sVexamined-shares-immutable +p79 +I37301 +sVoriginal-buckets +p80 +I14047 +sVactual-shares-immutable +p81 +I0 +sVconfigured-shares +p82 +I0 +sVoriginal-buckets-mutable +p83 +I691 +sVactual-diskbytes +p84 +I4096 +sVactual-shares-mutable +p85 +I0 +sVconfigured-buckets +p86 +I1 +sVexamined-buckets-unknown +p87 +I14 +sVactual-sharebytes +p88 +I0 +sVoriginal-shares +p89 +I28642 +sVactual-buckets-immutable +p90 +I0 +sVoriginal-sharebytes +p91 +I2695552941 +sVexamined-sharebytes-immutable +p92 +I2754798505 +sVactual-shares +p93 +I0 +sVactual-sharebytes-immutable +p94 +I0 +sVoriginal-diskbytes +p95 +I2818981888 +sVconfigured-diskbytes-mutable +p96 +I0 +sVconfigured-sharebytes-immutable +p97 +I0 +sVconfigured-shares-mutable +p98 +I0 +sVactual-diskbytes-immutable +p99 +I0 +sVconfigured-diskbytes-immutable +p100 +I0 +sVoriginal-diskbytes-mutable +p101 +I7995392 +sVactual-sharebytes-mutable +p102 +I0 +sVconfigured-sharebytes +p103 +I0 +sVexamined-shares +p104 +I39774 +sVactual-diskbytes-mutable +p105 +I0 +sVactual-buckets +p106 +I1 +sVoriginal-buckets-immutable +p107 +I13355 +sVconfigured-sharebytes-mutable +p108 +I0 +sVexamined-sharebytes +p109 +I2763646972 +sVoriginal-sharebytes-immutable +p110 +I2692076909 +sVoriginal-sharebytes-mutable +p111 +I3476032 +sVactual-buckets-mutable +p112 +I0 +sVexamined-buckets-mutable +p113 +I1286 +sVconfigured-shares-immutable +p114 +I0 +sVexamined-diskbytes +p115 +I2854801408 +sVexamined-diskbytes-mutable +p116 +I12161024 +sVexamined-sharebytes-mutable +p117 +I8848467 +sVexamined-buckets +p118 +I19197 +sVconfigured-diskbytes +p119 +I4096 +sVexamined-diskbytes-immutable +p120 +I2842640384 +ssVcorrupt-shares +p121 +(lp122 +(V2dn6xnlnsqwtnapwxfdivpm3s4 +p123 +I3 +tp124 +a(g123 +I0 +tp125 +a(V2rrzthwsrrxolevmwdvbdy3rqi +p126 +I3 +tp127 +a(g126 +I0 +tp128 +a(V2skfngcto6h7eqmn4uo7ntk3ne +p129 +I3 +tp130 +a(g129 +I0 +tp131 +a(V32d5swqpqx2mwix7xmqzvhdwje +p132 +I3 +tp133 +a(g132 +I0 +tp134 +a(V5mmayp66yflmpon3o6unsnbaca +p135 +I3 +tp136 +a(g135 +I0 +tp137 +a(V6ixhpvbtre7fnrl6pehlrlflc4 +p138 +I3 +tp139 +a(g138 +I0 +tp140 +a(Vewzhvswjsz4vp2bqkb6mi3bz2u +p141 +I3 +tp142 +a(g141 +I0 +tp143 +a(Vfu7pazf6ogavkqj6z4q5qqex3u +p144 +I3 +tp145 +a(g144 +I0 +tp146 +a(Vhbyjtqvpcimwxiyqbcbbdn2i4a +p147 +I3 +tp148 +a(g147 +I0 +tp149 +a(Vpmcjbdkbjdl26k3e6yja77femq +p150 +I3 +tp151 +a(g150 +I0 +tp152 +a(Vr6swof4v2uttbiiqwj5pi32cm4 +p153 +I3 +tp154 +a(g153 +I0 +tp155 +a(Vt45v5akoktf53evc2fi6gwnv6y +p156 +I3 +tp157 +a(g156 +I0 +tp158 +a(Vy6zb4faar3rdvn3e6pfg4wlotm +p159 +I3 +tp160 +a(g159 +I0 +tp161 +a(Vz3yghutvqoqbchjao4lndnrh3a +p162 +I3 +tp163 +a(g162 +I0 +tp164 +ass. \ No newline at end of file From 1c93175583365966f0b476ecc95720efcbf2f827 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 2 Nov 2021 22:42:33 -0600 Subject: [PATCH 16/35] cleanup --- src/allmydata/storage/crawler.py | 22 ++++------------------ src/allmydata/storage/expirer.py | 1 - 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index d1366765e..a06806d17 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -82,10 +82,6 @@ def _convert_pickle_state_to_json(state): :return dict: the state in the JSON form """ - # ["cycle-to-date"]["corrupt-shares"] from 2-tuple to list - # ["cycle-to-date"]["leases-per-share-histogram"] gets str keys instead of int - # ["cycle-start-finish-times"] from 2-tuple to list - # ["history"] keys are strings assert state["version"] == 1, "Only known version is 1" converters = { @@ -123,12 +119,12 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): # upgrade the pickle data to JSON import pickle - with state_path.open("r") as f: + with state_path.open("rb") as f: state = pickle.load(f) new_state = convert_pickle(state) - json_state_path = state_path.siblingExtension(".json") - with json_state_path.open("w") as f: + with json_state_path.open("wb") as f: json.dump(new_state, f) + # we've written the JSON, delete the pickle state_path.remove() return json_state_path.path @@ -148,15 +144,9 @@ class _LeaseStateSerializer(object): _convert_pickle_state_to_json, ) ) - # XXX want this to .. load and save the state - # - if the state is pickle-only: - # - load it and convert to json format - # - save json - # - delete pickle - # - if the state is json, load it def load(self): - with self._path.open("r") as f: + with self._path.open("rb") as f: return json.load(f) def save(self, data): @@ -388,10 +378,6 @@ class ShareCrawler(service.MultiService): else: last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix - - # Note: we use self.get_state() here because e.g - # LeaseCheckingCrawler stores non-JSON-able state in - # self.state() but converts it in self.get_state() self._state_serializer.save(self.get_state()) def startService(self): diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 9ba71539c..ad1343ef5 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -123,7 +123,6 @@ class LeaseCheckingCrawler(ShareCrawler): cutoff_date, # used if expiration_mode=="cutoff-date" sharetypes): self._history_serializer = _HistorySerializer(historyfile) - ##self.historyfile = historyfile self.expiration_enabled = expiration_enabled self.mode = mode self.override_lease_duration = None From 23ff1b2430334d376abd426421039562f94bcfc8 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 2 Nov 2021 22:45:08 -0600 Subject: [PATCH 17/35] noqa --- src/allmydata/storage/crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index a06806d17..129659d27 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -11,7 +11,7 @@ from __future__ import print_function from future.utils import PY2, PY3 if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import os import time From 2fe686135bf9513cbdbbe700e5faa63ee1743e83 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 2 Nov 2021 23:33:54 -0600 Subject: [PATCH 18/35] rename data to appease distutils --- .../data/{lease_checker.history => lease_checker.history.txt} | 0 .../data/{lease_checker.state => lease_checker.state.txt} | 0 src/allmydata/test/test_storage_web.py | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/allmydata/test/data/{lease_checker.history => lease_checker.history.txt} (100%) rename src/allmydata/test/data/{lease_checker.state => lease_checker.state.txt} (100%) diff --git a/src/allmydata/test/data/lease_checker.history b/src/allmydata/test/data/lease_checker.history.txt similarity index 100% rename from src/allmydata/test/data/lease_checker.history rename to src/allmydata/test/data/lease_checker.history.txt diff --git a/src/allmydata/test/data/lease_checker.state b/src/allmydata/test/data/lease_checker.state.txt similarity index 100% rename from src/allmydata/test/data/lease_checker.state rename to src/allmydata/test/data/lease_checker.state.txt diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 70866cba9..269af2203 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1151,7 +1151,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): The crawler can read existing state from the old pickle format """ # this file came from an "in the wild" tahoe version 1.16.0 - original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state") + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state.txt") test_pickle = FilePath("lease_checker.state") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) @@ -1339,7 +1339,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): format """ # this file came from an "in the wild" tahoe version 1.16.0 - original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history") + original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history.txt") test_pickle = FilePath("lease_checker.history") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) From 5855a30e34e1f18d44cbd898dee1b128be2cd976 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 23 Nov 2021 14:01:43 -0700 Subject: [PATCH 19/35] add docstrings --- src/allmydata/storage/crawler.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 129659d27..dcbea909a 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -146,10 +146,17 @@ class _LeaseStateSerializer(object): ) def load(self): + """ + :returns: deserialized JSON state + """ with self._path.open("rb") as f: return json.load(f) def save(self, data): + """ + Serialize the given data as JSON into the state-path + :returns: None + """ tmpfile = self._path.siblingExtension(".tmp") with tmpfile.open("wb") as f: json.dump(data, f) From 49f24893219482a53a882c8139e3c46ffedcd48e Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 15:59:27 -0700 Subject: [PATCH 20/35] explicit 'migrate pickle files' command --- src/allmydata/scripts/admin.py | 55 +++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index a9feed0dd..c125bc9e6 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -18,7 +18,17 @@ except ImportError: pass from twisted.python import usage -from allmydata.scripts.common import BaseOptions +from twisted.python.filepath import ( + FilePath, +) +from allmydata.scripts.common import ( + BaseOptions, + BasedirOptions, +) +from allmydata.storage import ( + crawler, + expirer, +) class GenerateKeypairOptions(BaseOptions): @@ -65,12 +75,54 @@ def derive_pubkey(options): print("public:", str(ed25519.string_from_verifying_key(public_key), "ascii"), file=out) return 0 +class MigrateCrawlerOptions(BasedirOptions): + + def getSynopsis(self): + return "Usage: tahoe [global-options] admin migrate-crawler" + + def getUsage(self, width=None): + t = BasedirOptions.getUsage(self, width) + t += ( + "The crawler data is now stored as JSON to avoid" + " potential security issues with pickle files.\n\nIf" + " you are confident the state files in the 'storage/'" + " subdirectory of your node are trustworthy, run this" + " command to upgrade them to JSON.\n\nThe files are:" + " lease_checker.history, lease_checker.state, and" + " bucket_counter.state" + ) + return t + +def migrate_crawler(options): + out = options.stdout + storage = FilePath(options['basedir']).child("storage") + + conversions = [ + (storage.child("lease_checker.state"), crawler._convert_pickle_state_to_json), + (storage.child("bucket_counter.state"), crawler._convert_pickle_state_to_json), + (storage.child("lease_checker.history"), expirer._convert_pickle_state_to_json), + ] + + for fp, converter in conversions: + existed = fp.exists() + newfp = crawler._maybe_upgrade_pickle_to_json(fp, converter) + if existed: + print("Converted '{}' to '{}'".format(fp.path, newfp.path)) + else: + if newfp.exists(): + print("Already converted: '{}'".format(newfp.path)) + else: + print("Not found: '{}'".format(fp.path)) + + class AdminCommand(BaseOptions): subCommands = [ ("generate-keypair", None, GenerateKeypairOptions, "Generate a public/private keypair, write to stdout."), ("derive-pubkey", None, DerivePubkeyOptions, "Derive a public key from a private key."), + ("migrate-crawler", None, MigrateCrawlerOptions, + "Write the crawler-history data as JSON."), ] def postOptions(self): if not hasattr(self, 'subOptions'): @@ -88,6 +140,7 @@ each subcommand. subDispatch = { "generate-keypair": print_keypair, "derive-pubkey": derive_pubkey, + "migrate-crawler": migrate_crawler, } def do_admin(options): From ce25795e4e86b778ad6cc739fdc83d42d101101e Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 16:00:19 -0700 Subject: [PATCH 21/35] new news --- newsfragments/3825.security | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/newsfragments/3825.security b/newsfragments/3825.security index b16418d2b..df83821de 100644 --- a/newsfragments/3825.security +++ b/newsfragments/3825.security @@ -1,5 +1,6 @@ The lease-checker now uses JSON instead of pickle to serialize its state. -Once you have run this version the lease state files will be stored in JSON -and an older version of the software won't load them (it simply won't notice -them so it will appear to have never run). +tahoe will now refuse to run until you either delete all pickle files or +migrate them using the new command: + + tahoe admin migrate-crawler From 3fd1ca8acbc3d046c97e3c520fdc6fab5d67541d Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 16:00:35 -0700 Subject: [PATCH 22/35] it's an error to have pickle-format files --- src/allmydata/scripts/tahoe_run.py | 16 +++++++++++++++- src/allmydata/storage/crawler.py | 8 ++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 01f1a354c..51be32ee3 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -27,7 +27,9 @@ from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin - +from allmydata.storage.crawler import ( + MigratePickleFileError, +) from allmydata.node import ( PortAssignmentRequired, PrivacyError, @@ -164,6 +166,18 @@ class DaemonizeTheRealService(Service, HookMixin): self.stderr.write("\ntub.port cannot be 0: you must choose.\n\n") elif reason.check(PrivacyError): self.stderr.write("\n{}\n\n".format(reason.value)) + elif reason.check(MigratePickleFileError): + self.stderr.write( + "Error\nAt least one 'pickle' format file exists.\n" + "The file is {}\n" + "You must either delete the pickle-format files" + " or migrate them using the command:\n" + " tahoe admin migrate-crawler --basedir {}\n\n" + .format( + reason.value.args[0].path, + self.basedir, + ) + ) else: self.stderr.write("\nUnknown error\n") reason.printTraceback(self.stderr) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index dcbea909a..2b8cde230 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -27,6 +27,14 @@ class TimeSliceExceeded(Exception): pass +class MigratePickleFileError(Exception): + """ + A pickle-format file exists (the FilePath to the file will be the + single arg). + """ + pass + + def _convert_cycle_data(state): """ :param dict state: cycle-to-date or history-item state From 1b8ae8039e79bf288916edb9f7949f76f943aef4 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 16:01:15 -0700 Subject: [PATCH 23/35] no auto-migrate; produce error if pickle-files exist --- src/allmydata/storage/crawler.py | 31 +++++++++++++++++++++---------- src/allmydata/storage/expirer.py | 14 ++++++-------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 2b8cde230..f63754e10 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -108,7 +108,7 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): :param Callable[dict] convert_pickle: function to change pickle-style state into JSON-style state - :returns unicode: the local path where the state is stored + :returns FilePath: the local path where the state is stored If this state path is JSON, simply return it. @@ -116,14 +116,14 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): JSON path. """ if state_path.path.endswith(".json"): - return state_path.path + return state_path json_state_path = state_path.siblingExtension(".json") # if there's no file there at all, we're done because there's # nothing to upgrade if not state_path.exists(): - return json_state_path.path + return json_state_path # upgrade the pickle data to JSON import pickle @@ -135,7 +135,23 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): # we've written the JSON, delete the pickle state_path.remove() - return json_state_path.path + return json_state_path + + +def _confirm_json_format(fp): + """ + :param FilePath fp: the original (pickle) name of a state file + + This confirms that we do _not_ have the pickle-version of a + state-file and _do_ either have nothing, or the JSON version. If + the pickle-version exists, an exception is raised. + + :returns FilePath: the JSON name of a state file + """ + jsonfp = fp.siblingExtension(".json") + if fp.exists(): + raise MigratePickleFileError(fp) + return jsonfp class _LeaseStateSerializer(object): @@ -146,12 +162,7 @@ class _LeaseStateSerializer(object): """ def __init__(self, state_path): - self._path = FilePath( - _maybe_upgrade_pickle_to_json( - FilePath(state_path), - _convert_pickle_state_to_json, - ) - ) + self._path = _confirm_json_format(FilePath(state_path)) def load(self): """ diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index ad1343ef5..cd0a9369a 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -12,6 +12,8 @@ import os import struct from allmydata.storage.crawler import ( ShareCrawler, + MigratePickleFileError, + _confirm_json_format, _maybe_upgrade_pickle_to_json, _convert_cycle_data, ) @@ -40,17 +42,13 @@ def _convert_pickle_state_to_json(state): class _HistorySerializer(object): """ Serialize the 'history' file of the lease-crawler state. This is - "storage/history.state" for the pickle or - "storage/history.state.json" for the new JSON format. + "storage/lease_checker.history" for the pickle or + "storage/lease_checker.history.json" for the new JSON format. """ def __init__(self, history_path): - self._path = FilePath( - _maybe_upgrade_pickle_to_json( - FilePath(history_path), - _convert_pickle_state_to_json, - ) - ) + self._path = _confirm_json_format(FilePath(history_path)) + if not self._path.exists(): with self._path.open("wb") as f: json.dump({}, f) From 0a4bc385c5ac7c5e9410e83703250b425608be57 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 18:00:58 -0700 Subject: [PATCH 24/35] fix tests to use migrate command --- src/allmydata/scripts/admin.py | 7 ++-- src/allmydata/storage/crawler.py | 2 ++ src/allmydata/test/test_storage_web.py | 45 +++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index c125bc9e6..a6e826174 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -93,6 +93,7 @@ class MigrateCrawlerOptions(BasedirOptions): ) return t + def migrate_crawler(options): out = options.stdout storage = FilePath(options['basedir']).child("storage") @@ -107,12 +108,12 @@ def migrate_crawler(options): existed = fp.exists() newfp = crawler._maybe_upgrade_pickle_to_json(fp, converter) if existed: - print("Converted '{}' to '{}'".format(fp.path, newfp.path)) + print("Converted '{}' to '{}'".format(fp.path, newfp.path), file=out) else: if newfp.exists(): - print("Already converted: '{}'".format(newfp.path)) + print("Already converted: '{}'".format(newfp.path), file=out) else: - print("Not found: '{}'".format(fp.path)) + print("Not found: '{}'".format(fp.path), file=out) class AdminCommand(BaseOptions): diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index f63754e10..a1f70f4e5 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -148,6 +148,8 @@ def _confirm_json_format(fp): :returns FilePath: the JSON name of a state file """ + if fp.path.endswith(".json"): + return fp jsonfp = fp.siblingExtension(".json") if fp.exists(): raise MigratePickleFileError(fp) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 269af2203..86c2382f0 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -19,6 +19,7 @@ import time import os.path import re import json +from six.moves import StringIO from twisted.trial import unittest @@ -45,6 +46,13 @@ from allmydata.web.storage import ( StorageStatusElement, remove_prefix ) +from allmydata.scripts.admin import ( + MigrateCrawlerOptions, + migrate_crawler, +) +from allmydata.scripts.runner import ( + Options, +) from .common_util import FakeCanary from .common_web import ( @@ -1152,15 +1160,29 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): """ # this file came from an "in the wild" tahoe version 1.16.0 original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.state.txt") - test_pickle = FilePath("lease_checker.state") + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + test_pickle = storage.child("lease_checker.state") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) - serial = _LeaseStateSerializer(test_pickle.path) + # convert from pickle format to JSON + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) # the (existing) state file should have been upgraded to JSON - self.assertNot(test_pickle.exists()) + self.assertFalse(test_pickle.exists()) self.assertTrue(test_pickle.siblingExtension(".json").exists()) + serial = _LeaseStateSerializer(test_pickle.path) self.assertEqual( serial.load(), @@ -1340,10 +1362,25 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): """ # this file came from an "in the wild" tahoe version 1.16.0 original_pickle = FilePath(__file__).parent().child("data").child("lease_checker.history.txt") - test_pickle = FilePath("lease_checker.history") + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + test_pickle = storage.child("lease_checker.history") with test_pickle.open("w") as local, original_pickle.open("r") as remote: local.write(remote.read()) + # convert from pickle format to JSON + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) + serial = _HistorySerializer(test_pickle.path) self.maxDiff = None From fc9671a8122bd085fa1d4ea74e2d4850abdf529f Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 18:25:32 -0700 Subject: [PATCH 25/35] simplify, flake9 --- src/allmydata/scripts/admin.py | 2 +- src/allmydata/storage/crawler.py | 7 +------ src/allmydata/storage/expirer.py | 2 -- src/allmydata/test/test_storage_web.py | 1 - 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index a6e826174..e0dcc8821 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -106,7 +106,7 @@ def migrate_crawler(options): for fp, converter in conversions: existed = fp.exists() - newfp = crawler._maybe_upgrade_pickle_to_json(fp, converter) + newfp = crawler._upgrade_pickle_to_json(fp, converter) if existed: print("Converted '{}' to '{}'".format(fp.path, newfp.path), file=out) else: diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index a1f70f4e5..dbf4b1300 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -101,7 +101,7 @@ def _convert_pickle_state_to_json(state): } -def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): +def _upgrade_pickle_to_json(state_path, convert_pickle): """ :param FilePath state_path: the filepath to ensure is json @@ -110,14 +110,9 @@ def _maybe_upgrade_pickle_to_json(state_path, convert_pickle): :returns FilePath: the local path where the state is stored - If this state path is JSON, simply return it. - If this state is pickle, convert to the JSON format and return the JSON path. """ - if state_path.path.endswith(".json"): - return state_path - json_state_path = state_path.siblingExtension(".json") # if there's no file there at all, we're done because there's diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index cd0a9369a..abe3c37b6 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -12,9 +12,7 @@ import os import struct from allmydata.storage.crawler import ( ShareCrawler, - MigratePickleFileError, _confirm_json_format, - _maybe_upgrade_pickle_to_json, _convert_cycle_data, ) from allmydata.storage.shares import get_share_file diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 86c2382f0..490a3f775 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -47,7 +47,6 @@ from allmydata.web.storage import ( remove_prefix ) from allmydata.scripts.admin import ( - MigrateCrawlerOptions, migrate_crawler, ) from allmydata.scripts.runner import ( From 679c46451764aae1213239bb90dd25b36bba324e Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 18:43:06 -0700 Subject: [PATCH 26/35] tests --- src/allmydata/test/cli/test_admin.py | 86 ++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/allmydata/test/cli/test_admin.py diff --git a/src/allmydata/test/cli/test_admin.py b/src/allmydata/test/cli/test_admin.py new file mode 100644 index 000000000..bdfc0a46f --- /dev/null +++ b/src/allmydata/test/cli/test_admin.py @@ -0,0 +1,86 @@ +""" +Ported to Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +from six.moves import StringIO + +from testtools.matchers import ( + Contains, +) + +from twisted.trial import unittest +from twisted.python.filepath import FilePath + +from allmydata.scripts.admin import ( + migrate_crawler, +) +from allmydata.scripts.runner import ( + Options, +) +from ..common import ( + SyncTestCase, +) + +class AdminMigrateCrawler(SyncTestCase): + """ + Tests related to 'tahoe admin migrate-crawler' + """ + + def test_already(self): + """ + We've already migrated; don't do it again. + """ + + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + with storage.child("lease_checker.state.json").open("w") as f: + f.write(b"{}\n") + + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + options.stdout = StringIO() + migrate_crawler(options) + + self.assertThat( + options.stdout.getvalue(), + Contains("Already converted:"), + ) + + def test_usage(self): + """ + We've already migrated; don't do it again. + """ + + root = FilePath(self.mktemp()) + storage = root.child("storage") + storage.makedirs() + with storage.child("lease_checker.state.json").open("w") as f: + f.write(b"{}\n") + + top = Options() + top.parseOptions([ + "admin", "migrate-crawler", + "--basedir", storage.parent().path, + ]) + options = top.subOptions + while hasattr(options, "subOptions"): + options = options.subOptions + self.assertThat( + str(options), + Contains("security issues with pickle") + ) From b47381401c589c056afe89744df5b3f01f2ae5ae Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 19:01:09 -0700 Subject: [PATCH 27/35] flake8 --- src/allmydata/test/cli/test_admin.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/cli/test_admin.py b/src/allmydata/test/cli/test_admin.py index bdfc0a46f..082904652 100644 --- a/src/allmydata/test/cli/test_admin.py +++ b/src/allmydata/test/cli/test_admin.py @@ -16,8 +16,9 @@ from testtools.matchers import ( Contains, ) -from twisted.trial import unittest -from twisted.python.filepath import FilePath +from twisted.python.filepath import ( + FilePath, +) from allmydata.scripts.admin import ( migrate_crawler, From 85fa8fe32e05c68da46f15fe68b69781d05384a7 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 30 Nov 2021 23:00:59 -0700 Subject: [PATCH 28/35] py2/py3 glue code for json dumping --- src/allmydata/storage/crawler.py | 18 ++++++++++++++---- src/allmydata/storage/expirer.py | 7 +++---- src/allmydata/test/test_storage_web.py | 4 ++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index dbf4b1300..7516bc4e9 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -125,8 +125,7 @@ def _upgrade_pickle_to_json(state_path, convert_pickle): with state_path.open("rb") as f: state = pickle.load(f) new_state = convert_pickle(state) - with json_state_path.open("wb") as f: - json.dump(new_state, f) + _dump_json_to_file(new_state, json_state_path) # we've written the JSON, delete the pickle state_path.remove() @@ -151,6 +150,18 @@ def _confirm_json_format(fp): return jsonfp +def _dump_json_to_file(js, afile): + """ + Dump the JSON object `js` to the FilePath `afile` + """ + with afile.open("wb") as f: + data = json.dumps(js) + if PY2: + f.write(data) + else: + f.write(data.encode("utf8")) + + class _LeaseStateSerializer(object): """ Read and write state for LeaseCheckingCrawler. This understands @@ -174,8 +185,7 @@ class _LeaseStateSerializer(object): :returns: None """ tmpfile = self._path.siblingExtension(".tmp") - with tmpfile.open("wb") as f: - json.dump(data, f) + _dump_json_to_file(data, tmpfile) fileutil.move_into_place(tmpfile.path, self._path.path) return None diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index abe3c37b6..55ab51843 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -14,6 +14,7 @@ from allmydata.storage.crawler import ( ShareCrawler, _confirm_json_format, _convert_cycle_data, + _dump_json_to_file, ) from allmydata.storage.shares import get_share_file from allmydata.storage.common import UnknownMutableContainerVersionError, \ @@ -48,8 +49,7 @@ class _HistorySerializer(object): self._path = _confirm_json_format(FilePath(history_path)) if not self._path.exists(): - with self._path.open("wb") as f: - json.dump({}, f) + _dump_json_to_file({}, self._path) def load(self): """ @@ -65,8 +65,7 @@ class _HistorySerializer(object): """ Serialize the existing data as JSON. """ - with self._path.open("wb") as f: - json.dump(new_history, f) + _dump_json_to_file(new_history, self._path) return None diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 490a3f775..dff3b36f5 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1163,7 +1163,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.state") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: local.write(remote.read()) # convert from pickle format to JSON @@ -1365,7 +1365,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.history") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: local.write(remote.read()) # convert from pickle format to JSON From 25ca767095019f3f0d6288b2a870ef3b98eef112 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 11:49:52 -0700 Subject: [PATCH 29/35] an offering to the windows godesses --- src/allmydata/test/test_storage_web.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index dff3b36f5..282fb67e1 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -22,11 +22,11 @@ import json from six.moves import StringIO from twisted.trial import unittest - from twisted.internet import defer from twisted.application import service from twisted.web.template import flattenString from twisted.python.filepath import FilePath +from twisted.python.runtime import platform from foolscap.api import fireEventually from allmydata.util import fileutil, hashutil, base32, pollmixin @@ -1163,8 +1163,12 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.state") - with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - local.write(remote.read()) + with test_pickle.open("w") as local, original_pickle.open("r") as remote: + for line in remote.readlines(): + if platform.isWindows(): + local.write(line.replace("\n", "\r\n")) + else: + local.write(line.replace("\n", "\r\n")) # convert from pickle format to JSON top = Options() @@ -1366,7 +1370,11 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage.makedirs() test_pickle = storage.child("lease_checker.history") with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - local.write(remote.read()) + for line in remote.readlines(): + if platform.isWindows(): + local.write(line.replace("\n", "\r\n")) + else: + local.write(line) # convert from pickle format to JSON top = Options() From 7080ee6fc7747e1a9ca10ddb47fe81fd5e96a37b Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 12:02:06 -0700 Subject: [PATCH 30/35] oops --- src/allmydata/test/test_storage_web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 282fb67e1..1cf96d660 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1168,7 +1168,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): if platform.isWindows(): local.write(line.replace("\n", "\r\n")) else: - local.write(line.replace("\n", "\r\n")) + local.write(line) # convert from pickle format to JSON top = Options() From 940c6343cf32318b9ba72f1330fdd5371346ce1f Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 12:02:42 -0700 Subject: [PATCH 31/35] consistency --- src/allmydata/test/test_storage_web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 1cf96d660..961bbef98 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1369,7 +1369,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.history") - with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + with test_pickle.open("w") as local, original_pickle.open("r") as remote: for line in remote.readlines(): if platform.isWindows(): local.write(line.replace("\n", "\r\n")) From e0092ededaa64a800058658de2d9ab8472acb3bf Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 20:52:22 -0700 Subject: [PATCH 32/35] fine, just skip tests on windows --- src/allmydata/test/test_storage_web.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 961bbef98..a49b71325 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -19,6 +19,7 @@ import time import os.path import re import json +from unittest import skipIf from six.moves import StringIO from twisted.trial import unittest @@ -1153,6 +1154,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): d.addBoth(_cleanup) return d + @skipIf(platform.isWindows()) def test_deserialize_pickle(self): """ The crawler can read existing state from the old pickle format @@ -1163,12 +1165,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.state") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: - for line in remote.readlines(): - if platform.isWindows(): - local.write(line.replace("\n", "\r\n")) - else: - local.write(line) + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + test_pickle.write(original_pickle.read()) # convert from pickle format to JSON top = Options() @@ -1358,6 +1356,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): second_serial.load(), ) + @skipIf(platform.isWindows()) def test_deserialize_history_pickle(self): """ The crawler can read existing history state from the old pickle @@ -1369,12 +1368,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage = root.child("storage") storage.makedirs() test_pickle = storage.child("lease_checker.history") - with test_pickle.open("w") as local, original_pickle.open("r") as remote: - for line in remote.readlines(): - if platform.isWindows(): - local.write(line.replace("\n", "\r\n")) - else: - local.write(line) + with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: + test_pickle.write(original_pickle.read()) # convert from pickle format to JSON top = Options() From 40e7be6d8d7581f4c9fa71c0817207e11ac1a7e6 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 1 Dec 2021 23:46:10 -0700 Subject: [PATCH 33/35] needs reason --- src/allmydata/test/test_storage_web.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index a49b71325..9292c0b20 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1154,7 +1154,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): d.addBoth(_cleanup) return d - @skipIf(platform.isWindows()) + @skipIf(platform.isWindows(), "pickle test-data can't be loaded on windows") def test_deserialize_pickle(self): """ The crawler can read existing state from the old pickle format @@ -1356,7 +1356,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): second_serial.load(), ) - @skipIf(platform.isWindows()) + @skipIf(platform.isWindows(), "pickle test-data can't be loaded on windows") def test_deserialize_history_pickle(self): """ The crawler can read existing history state from the old pickle From 4bc0df7cc14f53901470a3e0d0f78a6d975c4781 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 2 Dec 2021 00:05:21 -0700 Subject: [PATCH 34/35] file, not path --- src/allmydata/test/test_storage_web.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 9292c0b20..18ea0220c 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -1166,7 +1166,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage.makedirs() test_pickle = storage.child("lease_checker.state") with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - test_pickle.write(original_pickle.read()) + local.write(remote.read()) # convert from pickle format to JSON top = Options() @@ -1369,7 +1369,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin): storage.makedirs() test_pickle = storage.child("lease_checker.history") with test_pickle.open("wb") as local, original_pickle.open("rb") as remote: - test_pickle.write(original_pickle.read()) + local.write(remote.read()) # convert from pickle format to JSON top = Options() From 53ff16f1a43717dd86e9582c379beb4d92ea17e9 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 2 Dec 2021 12:56:52 -0700 Subject: [PATCH 35/35] rst for news --- newsfragments/3825.security | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/newsfragments/3825.security b/newsfragments/3825.security index df83821de..3d112dd49 100644 --- a/newsfragments/3825.security +++ b/newsfragments/3825.security @@ -1,6 +1,8 @@ The lease-checker now uses JSON instead of pickle to serialize its state. tahoe will now refuse to run until you either delete all pickle files or -migrate them using the new command: +migrate them using the new command:: tahoe admin migrate-crawler + +This will migrate all crawler-related pickle files.