From 2e5662aa91cacf6ad36d1ea619ea08ea799591c7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Aug 2022 13:11:06 -0400 Subject: [PATCH 01/91] Temporarily enforce requirement that allocated size matches actual size of an immutable. --- src/allmydata/storage/immutable.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index f7f5aebce..6fcca3871 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -419,14 +419,19 @@ class BucketWriter(object): self._already_written.set(True, offset, end) self.ss.add_latency("write", self._clock.seconds() - start) self.ss.count("write") + return self._is_finished() - # Return whether the whole thing has been written. See - # https://github.com/mlenzen/collections-extended/issues/169 and - # https://github.com/mlenzen/collections-extended/issues/172 for why - # it's done this way. + def _is_finished(self): + """ + Return whether the whole thing has been written. + """ return sum([mr.stop - mr.start for mr in self._already_written.ranges()]) == self._max_size def close(self): + # TODO this can't actually be enabled, because it's not backwards + # compatible. But it's useful for testing, so leaving it on until the + # branch is ready for merge. + assert self._is_finished() precondition(not self.closed) self._timeout.cancel() start = self._clock.seconds() From 556606271dbde1ccfe47ba29cb3767985286a7b4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Aug 2022 13:11:45 -0400 Subject: [PATCH 02/91] News file. --- newsfragments/3915.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3915.minor diff --git a/newsfragments/3915.minor b/newsfragments/3915.minor new file mode 100644 index 000000000..e69de29bb From d50c98a1e95b912c1cbd04d4bf932117176f5ac0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Aug 2022 14:34:40 -0400 Subject: [PATCH 03/91] Calculate URI extension size upfront, instead of hand-waving with a larger value. --- src/allmydata/immutable/encode.py | 18 ++++++++++++++++++ src/allmydata/immutable/layout.py | 16 +++++++--------- src/allmydata/immutable/upload.py | 27 ++++++++++++--------------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 42fc18077..c7887b7ba 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -624,6 +624,7 @@ class Encoder(object): for k in ('crypttext_root_hash', 'crypttext_hash', ): assert k in self.uri_extension_data + self.uri_extension_data uri_extension = uri.pack_extension(self.uri_extension_data) ed = {} for k,v in self.uri_extension_data.items(): @@ -694,3 +695,20 @@ class Encoder(object): return self.uri_extension_data def get_uri_extension_hash(self): return self.uri_extension_hash + + def get_uri_extension_size(self): + """ + Calculate the size of the URI extension that gets written at the end of + immutables. + + This may be done earlier than actual encoding, so e.g. we might not + know the crypttext hashes, but that's fine for our purposes since we + only care about the length. + """ + params = self.uri_extension_data.copy() + assert params + params["crypttext_hash"] = b"\x00" * 32 + params["crypttext_root_hash"] = b"\x00" * 32 + params["share_root_hash"] = b"\x00" * 32 + uri_extension = uri.pack_extension(params) + return len(uri_extension) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 79c886237..74af09a2b 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -90,7 +90,7 @@ FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares def make_write_bucket_proxy(rref, server, data_size, block_size, num_segments, - num_share_hashes, uri_extension_size_max): + num_share_hashes, uri_extension_size): # Use layout v1 for small files, so they'll be readable by older versions # (= 2**32 or data_size >= 2**32: @@ -233,8 +232,7 @@ class WriteBucketProxy(object): def put_uri_extension(self, data): offset = self._offsets['uri_extension'] assert isinstance(data, bytes) - precondition(len(data) <= self._uri_extension_size_max, - len(data), self._uri_extension_size_max) + precondition(len(data) == self._uri_extension_size) length = struct.pack(self.fieldstruct, len(data)) return self._write(offset, length+data) diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py index cb332dfdf..6b9b48f6a 100644 --- a/src/allmydata/immutable/upload.py +++ b/src/allmydata/immutable/upload.py @@ -242,31 +242,26 @@ class UploadResults(object): def get_verifycapstr(self): return self._verifycapstr -# our current uri_extension is 846 bytes for small files, a few bytes -# more for larger ones (since the filesize is encoded in decimal in a -# few places). Ask for a little bit more just in case we need it. If -# the extension changes size, we can change EXTENSION_SIZE to -# allocate a more accurate amount of space. -EXTENSION_SIZE = 1000 -# TODO: actual extensions are closer to 419 bytes, so we can probably lower -# this. def pretty_print_shnum_to_servers(s): return ', '.join([ "sh%s: %s" % (k, '+'.join([idlib.shortnodeid_b2a(x) for x in v])) for k, v in s.items() ]) + class ServerTracker(object): def __init__(self, server, sharesize, blocksize, num_segments, num_share_hashes, storage_index, - bucket_renewal_secret, bucket_cancel_secret): + bucket_renewal_secret, bucket_cancel_secret, + uri_extension_size): self._server = server self.buckets = {} # k: shareid, v: IRemoteBucketWriter self.sharesize = sharesize + self.uri_extension_size = uri_extension_size wbp = layout.make_write_bucket_proxy(None, None, sharesize, blocksize, num_segments, num_share_hashes, - EXTENSION_SIZE) + uri_extension_size) self.wbp_class = wbp.__class__ # to create more of them self.allocated_size = wbp.get_allocated_size() self.blocksize = blocksize @@ -314,7 +309,7 @@ class ServerTracker(object): self.blocksize, self.num_segments, self.num_share_hashes, - EXTENSION_SIZE) + self.uri_extension_size) b[sharenum] = bp self.buckets.update(b) return (alreadygot, set(b.keys())) @@ -487,7 +482,7 @@ class Tahoe2ServerSelector(log.PrefixingLogMixin): def get_shareholders(self, storage_broker, secret_holder, storage_index, share_size, block_size, num_segments, total_shares, needed_shares, - min_happiness): + min_happiness, uri_extension_size): """ @return: (upload_trackers, already_serverids), where upload_trackers is a set of ServerTracker instances that have agreed to hold @@ -529,7 +524,8 @@ class Tahoe2ServerSelector(log.PrefixingLogMixin): # figure out how much space to ask for wbp = layout.make_write_bucket_proxy(None, None, share_size, 0, num_segments, - num_share_hashes, EXTENSION_SIZE) + num_share_hashes, + uri_extension_size) allocated_size = wbp.get_allocated_size() # decide upon the renewal/cancel secrets, to include them in the @@ -554,7 +550,7 @@ class Tahoe2ServerSelector(log.PrefixingLogMixin): def _create_server_tracker(server, renew, cancel): return ServerTracker( server, share_size, block_size, num_segments, num_share_hashes, - storage_index, renew, cancel, + storage_index, renew, cancel, uri_extension_size ) readonly_trackers, write_trackers = self._create_trackers( @@ -1326,7 +1322,8 @@ class CHKUploader(object): d = server_selector.get_shareholders(storage_broker, secret_holder, storage_index, share_size, block_size, - num_segments, n, k, desired) + num_segments, n, k, desired, + encoder.get_uri_extension_size()) def _done(res): self._server_selection_elapsed = time.time() - server_selection_started return res From 7aa97336a0fe7b8bda7de38e2c639b142e50f494 Mon Sep 17 00:00:00 2001 From: "Fon E. Noel NFEBE" Date: Wed, 17 Aug 2022 16:03:03 +0100 Subject: [PATCH 04/91] Refactor FakeWebTest & MemoryConsumerTest classes There are base test classes namely `SyncTestCase` and `AsyncTestCase` which we would like all test classes in this code base to extend. This commit refactors two test classes to use the `SyncTestCase` with the newer assert methods. Signed-off-by: Fon E. Noel NFEBE --- newsfragments/3916.minor | 0 src/allmydata/test/test_consumer.py | 24 +++++++++++++++--------- src/allmydata/test/test_testing.py | 7 ++++--- 3 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 newsfragments/3916.minor diff --git a/newsfragments/3916.minor b/newsfragments/3916.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/test/test_consumer.py b/src/allmydata/test/test_consumer.py index a689de462..234fc2594 100644 --- a/src/allmydata/test/test_consumer.py +++ b/src/allmydata/test/test_consumer.py @@ -14,11 +14,17 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from zope.interface import implementer -from twisted.trial.unittest import TestCase from twisted.internet.interfaces import IPushProducer, IPullProducer from allmydata.util.consumer import MemoryConsumer +from .common import ( + SyncTestCase, +) +from testtools.matchers import ( + Equals, +) + @implementer(IPushProducer) @implementer(IPullProducer) @@ -50,7 +56,7 @@ class Producer(object): self.consumer.unregisterProducer() -class MemoryConsumerTests(TestCase): +class MemoryConsumerTests(SyncTestCase): """Tests for MemoryConsumer.""" def test_push_producer(self): @@ -60,14 +66,14 @@ class MemoryConsumerTests(TestCase): consumer = MemoryConsumer() producer = Producer(consumer, [b"abc", b"def", b"ghi"]) consumer.registerProducer(producer, True) - self.assertEqual(consumer.chunks, [b"abc"]) + self.assertThat(consumer.chunks, Equals([b"abc"])) producer.iterate() producer.iterate() - self.assertEqual(consumer.chunks, [b"abc", b"def", b"ghi"]) - self.assertEqual(consumer.done, False) + self.assertThat(consumer.chunks, Equals([b"abc", b"def", b"ghi"])) + self.assertFalse(consumer.done) producer.iterate() - self.assertEqual(consumer.chunks, [b"abc", b"def", b"ghi"]) - self.assertEqual(consumer.done, True) + self.assertThat(consumer.chunks, Equals([b"abc", b"def", b"ghi"])) + self.assertTrue(consumer.done) def test_pull_producer(self): """ @@ -76,8 +82,8 @@ class MemoryConsumerTests(TestCase): consumer = MemoryConsumer() producer = Producer(consumer, [b"abc", b"def", b"ghi"]) consumer.registerProducer(producer, False) - self.assertEqual(consumer.chunks, [b"abc", b"def", b"ghi"]) - self.assertEqual(consumer.done, True) + self.assertThat(consumer.chunks, Equals([b"abc", b"def", b"ghi"])) + self.assertTrue(consumer.done) # download_to_data() is effectively tested by some of the filenode tests, e.g. diff --git a/src/allmydata/test/test_testing.py b/src/allmydata/test/test_testing.py index 527b235bd..3715d1aca 100644 --- a/src/allmydata/test/test_testing.py +++ b/src/allmydata/test/test_testing.py @@ -46,9 +46,10 @@ from hypothesis.strategies import ( binary, ) -from testtools import ( - TestCase, +from .common import ( + SyncTestCase, ) + from testtools.matchers import ( Always, Equals, @@ -61,7 +62,7 @@ from testtools.twistedsupport import ( ) -class FakeWebTest(TestCase): +class FakeWebTest(SyncTestCase): """ Test the WebUI verified-fakes infrastucture """ From c9084a2a45fb16cc90d4f6043017cbc57ba463a9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 12:49:06 -0400 Subject: [PATCH 05/91] Disable assertion we can't, sadly, enable. --- src/allmydata/storage/immutable.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 6fcca3871..a02fd3bb2 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -428,10 +428,9 @@ class BucketWriter(object): return sum([mr.stop - mr.start for mr in self._already_written.ranges()]) == self._max_size def close(self): - # TODO this can't actually be enabled, because it's not backwards - # compatible. But it's useful for testing, so leaving it on until the - # branch is ready for merge. - assert self._is_finished() + # This can't actually be enabled, because it's not backwards compatible + # with old Foolscap clients. + # assert self._is_finished() precondition(not self.closed) self._timeout.cancel() start = self._clock.seconds() From 9d03c476d196c78d7ca5ac36e57c3f1b6c1434b0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 12:49:45 -0400 Subject: [PATCH 06/91] Make sure we write all the bytes we say we're sending. --- src/allmydata/immutable/layout.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 74af09a2b..30ab985a8 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -118,6 +118,7 @@ class WriteBucketProxy(object): self._data_size = data_size self._block_size = block_size self._num_segments = num_segments + self._written_bytes = 0 effective_segments = mathutil.next_power_of_k(num_segments,2) self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE @@ -194,6 +195,11 @@ class WriteBucketProxy(object): return self._write(offset, data) def put_crypttext_hashes(self, hashes): + # plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and + # so is not explicitly written, but we need to write everything, so + # fill it in with nulls. + self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + offset = self._offsets['crypttext_hash_tree'] assert isinstance(hashes, list) data = b"".join(hashes) @@ -242,11 +248,12 @@ class WriteBucketProxy(object): # would reduce the foolscap CPU overhead per share, but wouldn't # reduce the number of round trips, so it might not be worth the # effort. - + self._written_bytes += len(data) return self._pipeline.add(len(data), self._rref.callRemote, "write", offset, data) def close(self): + assert self._written_bytes == self.get_allocated_size(), f"{self._written_bytes} != {self.get_allocated_size()}" d = self._pipeline.add(0, self._rref.callRemote, "close") d.addCallback(lambda ign: self._pipeline.flush()) return d From 3464637bbb1de4a739d87a14d95b0a300c326063 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 12:54:26 -0400 Subject: [PATCH 07/91] Fix unit tests. --- src/allmydata/test/test_storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index c3f2a35e1..134609f81 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -463,7 +463,7 @@ class BucketProxy(unittest.TestCase): block_size=10, num_segments=5, num_share_hashes=3, - uri_extension_size_max=500) + uri_extension_size=500) self.failUnless(interfaces.IStorageBucketWriter.providedBy(bp), bp) def _do_test_readwrite(self, name, header_size, wbp_class, rbp_class): @@ -494,7 +494,7 @@ class BucketProxy(unittest.TestCase): block_size=25, num_segments=4, num_share_hashes=3, - uri_extension_size_max=len(uri_extension)) + uri_extension_size=len(uri_extension)) d = bp.put_header() d.addCallback(lambda res: bp.put_block(0, b"a"*25)) From cd81e5a01c82796fd3d69c93fb7f088ad6bf2a3b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 13:13:22 -0400 Subject: [PATCH 08/91] Hint for future debugging. --- src/allmydata/storage/immutable.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index a02fd3bb2..0893513ae 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -397,7 +397,9 @@ class BucketWriter(object): """ Write data at given offset, return whether the upload is complete. """ - # Delay the timeout, since we received data: + # Delay the timeout, since we received data; if we get an + # AlreadyCancelled error, that means there's a bug in the client and + # write() was called after close(). self._timeout.reset(30 * 60) start = self._clock.seconds() precondition(not self.closed) From 92662d802cf0e43a5a5f684faba455de7a6cde53 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 13:15:13 -0400 Subject: [PATCH 09/91] Don't drop a Deferred on the ground. --- src/allmydata/immutable/layout.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 30ab985a8..de390bda9 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -198,8 +198,11 @@ class WriteBucketProxy(object): # plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and # so is not explicitly written, but we need to write everything, so # fill it in with nulls. - self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + d = self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + d.addCallback(lambda _: self._really_put_crypttext_hashes(hashes)) + return d + def _really_put_crypttext_hashes(self, hashes): offset = self._offsets['crypttext_hash_tree'] assert isinstance(hashes, list) data = b"".join(hashes) From bdb4aac0de1bd03fb8625e156135d4f0964e478c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 13:15:27 -0400 Subject: [PATCH 10/91] Pass in the missing argument. --- src/allmydata/test/test_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 8d5435e88..18192de6c 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -983,7 +983,7 @@ class EncodingParameters(GridTestMixin, unittest.TestCase, SetDEPMixin, num_segments = encoder.get_param("num_segments") d = selector.get_shareholders(broker, sh, storage_index, share_size, block_size, num_segments, - 10, 3, 4) + 10, 3, 4, encoder.get_uri_extension_size()) def _have_shareholders(upload_trackers_and_already_servers): (upload_trackers, already_servers) = upload_trackers_and_already_servers assert servers_to_break <= len(upload_trackers) From 9975fddd88d263a58e146b91b2c22b5b53500a85 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 8 Sep 2022 13:42:19 -0400 Subject: [PATCH 11/91] Get rid of garbage. --- src/allmydata/immutable/encode.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index c7887b7ba..34a9c2472 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -624,7 +624,6 @@ class Encoder(object): for k in ('crypttext_root_hash', 'crypttext_hash', ): assert k in self.uri_extension_data - self.uri_extension_data uri_extension = uri.pack_extension(self.uri_extension_data) ed = {} for k,v in self.uri_extension_data.items(): From c82bb5f21c90e293c1507c71aa68fb4768b3abb6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 8 Sep 2022 13:44:22 -0400 Subject: [PATCH 12/91] Use a more meaningful constant. --- src/allmydata/immutable/encode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 34a9c2472..3c4440486 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -706,8 +706,8 @@ class Encoder(object): """ params = self.uri_extension_data.copy() assert params - params["crypttext_hash"] = b"\x00" * 32 - params["crypttext_root_hash"] = b"\x00" * 32 - params["share_root_hash"] = b"\x00" * 32 + params["crypttext_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE + params["crypttext_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE + params["share_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE uri_extension = uri.pack_extension(params) return len(uri_extension) From 6310774b8267d2deb0620c1766bc7f93694a3803 Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Thu, 8 Sep 2022 17:50:58 +0000 Subject: [PATCH 13/91] Add documentation on OpenMetrics statistics endpoint. references ticket:3786 --- docs/stats.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/stats.rst b/docs/stats.rst index 50642d816..c7d69e0d2 100644 --- a/docs/stats.rst +++ b/docs/stats.rst @@ -264,3 +264,18 @@ the "tahoe-conf" file for notes about configuration and installing these plugins into a Munin environment. .. _Munin: http://munin-monitoring.org/ + + +Scraping Stats Values in OpenMetrics Format +=========================================== + +Time Series DataBase (TSDB) software like Prometheus_ and VictoriaMetrics_ can +parse statistics from the e.g. http://localhost:3456/statistics?t=openmetrics +URL in OpenMetrics_ format. Software like Grafana_ can then be used to graph +and alert on these numbers. You can find a pre-configured dashboard for +Grafana at https://grafana.com/grafana/dashboards/16894-tahoe-lafs/. + +.. _OpenMetrics: https://openmetrics.io/ +.. _Prometheus: https://prometheus.io/ +.. _VictoriaMetrics: https://victoriametrics.com/ +.. _Grafana: https://grafana.com/ From ae21ab74a2afb8a0db234e76e2d09e76df0f5958 Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Thu, 8 Sep 2022 18:05:59 +0000 Subject: [PATCH 14/91] Add newsfragment for the added documentation. --- newsfragments/3786.minor | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3786.minor diff --git a/newsfragments/3786.minor b/newsfragments/3786.minor new file mode 100644 index 000000000..ecd1a2c4e --- /dev/null +++ b/newsfragments/3786.minor @@ -0,0 +1 @@ +Added re-structured text documentation for the OpenMetrics format statistics endpoint. From 373a5328293693612123e7e47be4c01e5de3746b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 09:36:56 -0400 Subject: [PATCH 15/91] Detect corrupted UEB length more consistently. --- src/allmydata/immutable/layout.py | 8 ++++---- src/allmydata/test/test_repairer.py | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index de390bda9..6679fc94c 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -495,10 +495,10 @@ class ReadBucketProxy(object): if len(data) != self._fieldsize: raise LayoutInvalid("not enough bytes to encode URI length -- should be %d bytes long, not %d " % (self._fieldsize, len(data),)) length = struct.unpack(self._fieldstruct, data)[0] - if length >= 2**31: - # URI extension blocks are around 419 bytes long, so this - # must be corrupted. Anyway, the foolscap interface schema - # for "read" will not allow >= 2**31 bytes length. + if length >= 2000: + # URI extension blocks are around 419 bytes long; in previous + # versions of the code 1000 was used as a default catchall. So + # 2000 or more must be corrupted. raise RidiculouslyLargeURIExtensionBlock(length) return self._read(offset+self._fieldsize, length) diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index f9b93af72..8545b1cf4 100644 --- a/src/allmydata/test/test_repairer.py +++ b/src/allmydata/test/test_repairer.py @@ -251,6 +251,12 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin): self.judge_invisible_corruption) def test_corrupt_ueb(self): + # Note that in some rare situations this might fail, specifically if + # the length of the UEB is corrupted to be a value that is bigger than + # the size but less than 2000, it might not get caught... But that's + # mostly because in that case it doesn't meaningfully corrupt it. See + # _get_uri_extension_the_old_way() in layout.py for where the 2000 + # number comes from. self.basedir = "repairer/Verifier/corrupt_ueb" return self._help_test_verify(common._corrupt_uri_extension, self.judge_invisible_corruption) From 8d5f08771a4f73f09611500c39627334f7273fc9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 09:45:46 -0400 Subject: [PATCH 16/91] Minimal check on parameters' contents. --- src/allmydata/immutable/encode.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 3c4440486..874492785 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -705,9 +705,13 @@ class Encoder(object): only care about the length. """ params = self.uri_extension_data.copy() - assert params params["crypttext_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE params["crypttext_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE params["share_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE + assert params.keys() == { + "codec_name", "codec_params", "size", "segment_size", "num_segments", + "needed_shares", "total_shares", "tail_codec_params", + "crypttext_hash", "crypttext_root_hash", "share_root_hash" + }, params.keys() uri_extension = uri.pack_extension(params) return len(uri_extension) From 00972ba3c6d8b9b83eb8c069ec1c9fa5768aaed3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 09:59:36 -0400 Subject: [PATCH 17/91] Match latest GBS spec. --- docs/specifications/url.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 31fb05fad..a9e37a0ec 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -103,11 +103,8 @@ Version 1 The hash component of a version 1 NURL differs in three ways from the prior version. -1. The hash function used is SHA3-224 instead of SHA1. - The security of SHA1 `continues to be eroded`_. - Contrariwise SHA3 is currently the most recent addition to the SHA family by NIST. - The 224 bit instance is chosen to keep the output short and because it offers greater collision resistance than SHA1 was thought to offer even at its inception - (prior to security research showing actual collision resistance is lower). +1. The hash function used is SHA-256, to match RFC 7469. + The security of SHA1 `continues to be eroded`_; Latacora `SHA-2`_. 2. The hash is computed over the certificate's SPKI instead of the whole certificate. This allows certificate re-generation so long as the public key remains the same. This is useful to allow contact information to be updated or extension of validity period. @@ -140,7 +137,8 @@ Examples * ``pb://azEu8vlRpnEeYm0DySQDeNY3Z2iJXHC_bsbaAw@localhost:47877/64i4aokv4ej#v=1`` .. _`continues to be eroded`: https://en.wikipedia.org/wiki/SHA-1#Cryptanalysis_and_validation -.. _`explored by the web community`: https://www.imperialviolet.org/2011/05/04/pinning.html +.. _`SHA-2`: https://latacora.micro.blog/2018/04/03/cryptographic-right-answers.html +.. _`explored by the web community`: https://www.rfc-editor.org/rfc/rfc7469 .. _Foolscap: https://github.com/warner/foolscap .. [1] ``foolscap.furl.decode_furl`` is taken as the canonical definition of the syntax of a fURL. From 1759eacee3c4cbdb72d956bf1df2c35f7fc435bb Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:09:25 -0400 Subject: [PATCH 18/91] No need to include NURL. --- docs/proposed/http-storage-node-protocol.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 3dac376ff..b601a785b 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -409,8 +409,7 @@ For example:: "tolerates-immutable-read-overrun": true, "delete-mutable-shares-with-zero-length-writev": true, "fills-holes-with-zero-bytes": true, - "prevents-read-past-end-of-share-data": true, - "gbs-anonymous-storage-url": "pb://...#v=1" + "prevents-read-past-end-of-share-data": true }, "application-version": "1.13.0" } From 0d97847ef5c4625d972dd92e29f9a8187f97a6b2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:09:50 -0400 Subject: [PATCH 19/91] News file. --- newsfragments/3904.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3904.minor diff --git a/newsfragments/3904.minor b/newsfragments/3904.minor new file mode 100644 index 000000000..e69de29bb From b1aa93e02234bae93efac860a0078d5a1c089d2a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:34:59 -0400 Subject: [PATCH 20/91] Switch prefix. --- docs/proposed/http-storage-node-protocol.rst | 48 ++++++++++---------- src/allmydata/storage/http_client.py | 28 ++++++++---- src/allmydata/storage/http_server.py | 27 ++++++----- src/allmydata/test/test_storage_http.py | 8 ++-- 4 files changed, 61 insertions(+), 50 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index b601a785b..ec800367c 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -395,7 +395,7 @@ Encoding General ~~~~~~~ -``GET /v1/version`` +``GET /storage/v1/version`` !!!!!!!!!!!!!!!!!!! Retrieve information about the version of the storage server. @@ -414,7 +414,7 @@ For example:: "application-version": "1.13.0" } -``PUT /v1/lease/:storage_index`` +``PUT /storage/v1/lease/:storage_index`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Either renew or create a new lease on the bucket addressed by ``storage_index``. @@ -467,7 +467,7 @@ Immutable Writing ~~~~~~~ -``POST /v1/immutable/:storage_index`` +``POST /storage/v1/immutable/:storage_index`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Initialize an immutable storage index with some buckets. @@ -503,7 +503,7 @@ Handling repeat calls: Discussion `````````` -We considered making this ``POST /v1/immutable`` instead. +We considered making this ``POST /storage/v1/immutable`` instead. The motivation was to keep *storage index* out of the request URL. Request URLs have an elevated chance of being logged by something. We were concerned that having the *storage index* logged may increase some risks. @@ -538,7 +538,7 @@ Rejected designs for upload secrets: it must contain randomness. Randomness means there is no need to have a secret per share, since adding share-specific content to randomness doesn't actually make the secret any better. -``PATCH /v1/immutable/:storage_index/:share_number`` +``PATCH /storage/v1/immutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Write data for the indicated share. @@ -579,7 +579,7 @@ Responses: the response is ``CONFLICT``. At this point the only thing to do is abort the upload and start from scratch (see below). -``PUT /v1/immutable/:storage_index/:share_number/abort`` +``PUT /storage/v1/immutable/:storage_index/:share_number/abort`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! This cancels an *in-progress* upload. @@ -615,7 +615,7 @@ From RFC 7231:: PATCH method defined in [RFC5789]). -``POST /v1/immutable/:storage_index/:share_number/corrupt`` +``POST /storage/v1/immutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. The @@ -634,7 +634,7 @@ couldn't be found. Reading ~~~~~~~ -``GET /v1/immutable/:storage_index/shares`` +``GET /storage/v1/immutable/:storage_index/shares`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a list (semantically, a set) indicating all shares available for the @@ -644,7 +644,7 @@ indicated storage index. For example:: An unknown storage index results in an empty list. -``GET /v1/immutable/:storage_index/:share_number`` +``GET /storage/v1/immutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Read a contiguous sequence of bytes from one share in one bucket. @@ -685,7 +685,7 @@ Mutable Writing ~~~~~~~ -``POST /v1/mutable/:storage_index/read-test-write`` +``POST /storage/v1/mutable/:storage_index/read-test-write`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! General purpose read-test-and-write operation for mutable storage indexes. @@ -741,7 +741,7 @@ As a result, if there is no data at all, an empty bytestring is returned no matt Reading ~~~~~~~ -``GET /v1/mutable/:storage_index/shares`` +``GET /storage/v1/mutable/:storage_index/shares`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a set indicating all shares available for the indicated storage index. @@ -749,10 +749,10 @@ For example (this is shown as list, since it will be list for JSON, but will be [1, 5] -``GET /v1/mutable/:storage_index/:share_number`` +``GET /storage/v1/mutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Read data from the indicated mutable shares, just like ``GET /v1/immutable/:storage_index`` +Read data from the indicated mutable shares, just like ``GET /storage/v1/immutable/:storage_index`` The ``Range`` header may be used to request exactly one ``bytes`` range, in which case the response code will be 206 (partial content). Interpretation and response behavior is as specified in RFC 7233 ยง 4.1. @@ -764,7 +764,7 @@ The resulting ``Content-Range`` header will be consistent with the returned data If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. -``POST /v1/mutable/:storage_index/:share_number/corrupt`` +``POST /storage/v1/mutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. @@ -778,7 +778,7 @@ Immutable Data 1. Create a bucket for storage index ``AAAAAAAAAAAAAAAA`` to hold two immutable shares, discovering that share ``1`` was already uploaded:: - POST /v1/immutable/AAAAAAAAAAAAAAAA + POST /storage/v1/immutable/AAAAAAAAAAAAAAAA Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: lease-renew-secret efgh X-Tahoe-Authorization: lease-cancel-secret jjkl @@ -791,7 +791,7 @@ Immutable Data #. Upload the content for immutable share ``7``:: - PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + PATCH /storage/v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 0-15/48 X-Tahoe-Authorization: upload-secret xyzf @@ -799,7 +799,7 @@ Immutable Data 200 OK - PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + PATCH /storage/v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 16-31/48 X-Tahoe-Authorization: upload-secret xyzf @@ -807,7 +807,7 @@ Immutable Data 200 OK - PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + PATCH /storage/v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 32-47/48 X-Tahoe-Authorization: upload-secret xyzf @@ -817,7 +817,7 @@ Immutable Data #. Download the content of the previously uploaded immutable share ``7``:: - GET /v1/immutable/AAAAAAAAAAAAAAAA?share=7 + GET /storage/v1/immutable/AAAAAAAAAAAAAAAA?share=7 Authorization: Tahoe-LAFS nurl-swissnum Range: bytes=0-47 @@ -826,7 +826,7 @@ Immutable Data #. Renew the lease on all immutable shares in bucket ``AAAAAAAAAAAAAAAA``:: - PUT /v1/lease/AAAAAAAAAAAAAAAA + PUT /storage/v1/lease/AAAAAAAAAAAAAAAA Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: lease-cancel-secret jjkl X-Tahoe-Authorization: lease-renew-secret efgh @@ -841,7 +841,7 @@ The special test vector of size 1 but empty bytes will only pass if there is no existing share, otherwise it will read a byte which won't match `b""`:: - POST /v1/mutable/BBBBBBBBBBBBBBBB/read-test-write + POST /storage/v1/mutable/BBBBBBBBBBBBBBBB/read-test-write Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: write-enabler abcd X-Tahoe-Authorization: lease-cancel-secret efgh @@ -873,7 +873,7 @@ otherwise it will read a byte which won't match `b""`:: #. Safely rewrite the contents of a known version of mutable share number ``3`` (or fail):: - POST /v1/mutable/BBBBBBBBBBBBBBBB/read-test-write + POST /storage/v1/mutable/BBBBBBBBBBBBBBBB/read-test-write Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: write-enabler abcd X-Tahoe-Authorization: lease-cancel-secret efgh @@ -905,14 +905,14 @@ otherwise it will read a byte which won't match `b""`:: #. Download the contents of share number ``3``:: - GET /v1/mutable/BBBBBBBBBBBBBBBB?share=3&offset=0&size=10 + GET /storage/v1/mutable/BBBBBBBBBBBBBBBB?share=3&offset=0&size=10 Authorization: Tahoe-LAFS nurl-swissnum #. Renew the lease on previously uploaded mutable share in slot ``BBBBBBBBBBBBBBBB``:: - PUT /v1/lease/BBBBBBBBBBBBBBBB + PUT /storage/v1/lease/BBBBBBBBBBBBBBBB Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: lease-cancel-secret efgh X-Tahoe-Authorization: lease-renew-secret ijkl diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index a2dc5379f..16d426dda 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -392,7 +392,7 @@ class StorageClientGeneral(object): """ Return the version metadata for the server. """ - url = self._client.relative_url("/v1/version") + url = self._client.relative_url("/storage/v1/version") response = yield self._client.request("GET", url) decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"]) returnValue(decoded_response) @@ -408,7 +408,7 @@ class StorageClientGeneral(object): Otherwise a new lease is added. """ url = self._client.relative_url( - "/v1/lease/{}".format(_encode_si(storage_index)) + "/storage/v1/lease/{}".format(_encode_si(storage_index)) ) response = yield self._client.request( "PUT", @@ -457,7 +457,9 @@ def read_share_chunk( always provided by the current callers. """ url = client.relative_url( - "/v1/{}/{}/{}".format(share_type, _encode_si(storage_index), share_number) + "/storage/v1/{}/{}/{}".format( + share_type, _encode_si(storage_index), share_number + ) ) response = yield client.request( "GET", @@ -518,7 +520,7 @@ async def advise_corrupt_share( ): assert isinstance(reason, str) url = client.relative_url( - "/v1/{}/{}/{}/corrupt".format( + "/storage/v1/{}/{}/{}/corrupt".format( share_type, _encode_si(storage_index), share_number ) ) @@ -563,7 +565,9 @@ class StorageClientImmutables(object): Result fires when creating the storage index succeeded, if creating the storage index failed the result will fire with an exception. """ - url = self._client.relative_url("/v1/immutable/" + _encode_si(storage_index)) + url = self._client.relative_url( + "/storage/v1/immutable/" + _encode_si(storage_index) + ) message = {"share-numbers": share_numbers, "allocated-size": allocated_size} response = yield self._client.request( @@ -588,7 +592,9 @@ class StorageClientImmutables(object): ) -> Deferred[None]: """Abort the upload.""" url = self._client.relative_url( - "/v1/immutable/{}/{}/abort".format(_encode_si(storage_index), share_number) + "/storage/v1/immutable/{}/{}/abort".format( + _encode_si(storage_index), share_number + ) ) response = yield self._client.request( "PUT", @@ -620,7 +626,9 @@ class StorageClientImmutables(object): been uploaded. """ url = self._client.relative_url( - "/v1/immutable/{}/{}".format(_encode_si(storage_index), share_number) + "/storage/v1/immutable/{}/{}".format( + _encode_si(storage_index), share_number + ) ) response = yield self._client.request( "PATCH", @@ -668,7 +676,7 @@ class StorageClientImmutables(object): Return the set of shares for a given storage index. """ url = self._client.relative_url( - "/v1/immutable/{}/shares".format(_encode_si(storage_index)) + "/storage/v1/immutable/{}/shares".format(_encode_si(storage_index)) ) response = yield self._client.request( "GET", @@ -774,7 +782,7 @@ class StorageClientMutables: are done and if they are valid the writes are done. """ url = self._client.relative_url( - "/v1/mutable/{}/read-test-write".format(_encode_si(storage_index)) + "/storage/v1/mutable/{}/read-test-write".format(_encode_si(storage_index)) ) message = { "test-write-vectors": { @@ -817,7 +825,7 @@ class StorageClientMutables: List the share numbers for a given storage index. """ url = self._client.relative_url( - "/v1/mutable/{}/shares".format(_encode_si(storage_index)) + "/storage/v1/mutable/{}/shares".format(_encode_si(storage_index)) ) response = await self._client.request("GET", url) if response.code == http.OK: diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 68d0740b1..2e9b57b13 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -545,7 +545,7 @@ class HTTPServer(object): ##### Generic APIs ##### - @_authorized_route(_app, set(), "/v1/version", methods=["GET"]) + @_authorized_route(_app, set(), "/storage/v1/version", methods=["GET"]) def version(self, request, authorization): """Return version information.""" return self._send_encoded(request, self._storage_server.get_version()) @@ -555,7 +555,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.LEASE_RENEW, Secrets.LEASE_CANCEL, Secrets.UPLOAD}, - "/v1/immutable/", + "/storage/v1/immutable/", methods=["POST"], ) def allocate_buckets(self, request, authorization, storage_index): @@ -591,7 +591,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.UPLOAD}, - "/v1/immutable///abort", + "/storage/v1/immutable///abort", methods=["PUT"], ) def abort_share_upload(self, request, authorization, storage_index, share_number): @@ -622,7 +622,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.UPLOAD}, - "/v1/immutable//", + "/storage/v1/immutable//", methods=["PATCH"], ) def write_share_data(self, request, authorization, storage_index, share_number): @@ -665,7 +665,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/immutable//shares", + "/storage/v1/immutable//shares", methods=["GET"], ) def list_shares(self, request, authorization, storage_index): @@ -678,7 +678,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/immutable//", + "/storage/v1/immutable//", methods=["GET"], ) def read_share_chunk(self, request, authorization, storage_index, share_number): @@ -694,7 +694,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.LEASE_RENEW, Secrets.LEASE_CANCEL}, - "/v1/lease/", + "/storage/v1/lease/", methods=["PUT"], ) def add_or_renew_lease(self, request, authorization, storage_index): @@ -715,7 +715,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/immutable///corrupt", + "/storage/v1/immutable///corrupt", methods=["POST"], ) def advise_corrupt_share_immutable( @@ -736,7 +736,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.LEASE_RENEW, Secrets.LEASE_CANCEL, Secrets.WRITE_ENABLER}, - "/v1/mutable//read-test-write", + "/storage/v1/mutable//read-test-write", methods=["POST"], ) def mutable_read_test_write(self, request, authorization, storage_index): @@ -771,7 +771,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/mutable//", + "/storage/v1/mutable//", methods=["GET"], ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): @@ -795,7 +795,10 @@ class HTTPServer(object): return read_range(request, read_data, share_length) @_authorized_route( - _app, set(), "/v1/mutable//shares", methods=["GET"] + _app, + set(), + "/storage/v1/mutable//shares", + methods=["GET"], ) def enumerate_mutable_shares(self, request, authorization, storage_index): """List mutable shares for a storage index.""" @@ -805,7 +808,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/mutable///corrupt", + "/storage/v1/mutable///corrupt", methods=["POST"], ) def advise_corrupt_share_mutable( diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 419052282..4a912cf6c 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -255,7 +255,7 @@ class TestApp(object): else: return "BAD: {}".format(authorization) - @_authorized_route(_app, set(), "/v1/version", methods=["GET"]) + @_authorized_route(_app, set(), "/storage/v1/version", methods=["GET"]) def bad_version(self, request, authorization): """Return version result that violates the expected schema.""" request.setHeader("content-type", CBOR_MIME_TYPE) @@ -534,7 +534,7 @@ class GenericHTTPAPITests(SyncTestCase): lease_secret = urandom(32) storage_index = urandom(16) url = self.http.client.relative_url( - "/v1/immutable/" + _encode_si(storage_index) + "/storage/v1/immutable/" + _encode_si(storage_index) ) message = {"bad-message": "missing expected keys"} @@ -1418,7 +1418,7 @@ class SharedImmutableMutableTestsMixin: self.http.client.request( "GET", self.http.client.relative_url( - "/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + "/storage/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) ), ) ) @@ -1441,7 +1441,7 @@ class SharedImmutableMutableTestsMixin: self.http.client.request( "GET", self.http.client.relative_url( - "/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + "/storage/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) ), headers=headers, ) From f5b374a7a2ad95232e8cddca3d9d334f4f4b6986 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:56:11 -0400 Subject: [PATCH 21/91] Make sphinx happy. --- docs/proposed/http-storage-node-protocol.rst | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index ec800367c..a44408e6c 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -396,7 +396,7 @@ General ~~~~~~~ ``GET /storage/v1/version`` -!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve information about the version of the storage server. Information is returned as an encoded mapping. @@ -415,7 +415,7 @@ For example:: } ``PUT /storage/v1/lease/:storage_index`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Either renew or create a new lease on the bucket addressed by ``storage_index``. @@ -468,7 +468,7 @@ Writing ~~~~~~~ ``POST /storage/v1/immutable/:storage_index`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Initialize an immutable storage index with some buckets. The buckets may have share data written to them once. @@ -539,7 +539,7 @@ Rejected designs for upload secrets: Randomness means there is no need to have a secret per share, since adding share-specific content to randomness doesn't actually make the secret any better. ``PATCH /storage/v1/immutable/:storage_index/:share_number`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Write data for the indicated share. The share number must belong to the storage index. @@ -580,7 +580,7 @@ Responses: At this point the only thing to do is abort the upload and start from scratch (see below). ``PUT /storage/v1/immutable/:storage_index/:share_number/abort`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! This cancels an *in-progress* upload. @@ -616,7 +616,7 @@ From RFC 7231:: ``POST /storage/v1/immutable/:storage_index/:share_number/corrupt`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. The request body includes an human-meaningful text string with details about the @@ -635,7 +635,7 @@ Reading ~~~~~~~ ``GET /storage/v1/immutable/:storage_index/shares`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a list (semantically, a set) indicating all shares available for the indicated storage index. For example:: @@ -645,7 +645,7 @@ indicated storage index. For example:: An unknown storage index results in an empty list. ``GET /storage/v1/immutable/:storage_index/:share_number`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Read a contiguous sequence of bytes from one share in one bucket. The response body is the raw share data (i.e., ``application/octet-stream``). @@ -686,7 +686,7 @@ Writing ~~~~~~~ ``POST /storage/v1/mutable/:storage_index/read-test-write`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! General purpose read-test-and-write operation for mutable storage indexes. A mutable storage index is also called a "slot" @@ -742,7 +742,7 @@ Reading ~~~~~~~ ``GET /storage/v1/mutable/:storage_index/shares`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a set indicating all shares available for the indicated storage index. For example (this is shown as list, since it will be list for JSON, but will be set for CBOR):: @@ -765,7 +765,7 @@ If the response to a query is an empty range, the ``NO CONTENT`` (204) response ``POST /storage/v1/mutable/:storage_index/:share_number/corrupt`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. Just like the immutable version. From 4a573ede3461510d6f2aa09f78d2791dea8393b9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 11:29:32 -0400 Subject: [PATCH 22/91] Download the actual data we need, instead of relying on bad reading-beyond-the-end semantics. --- src/allmydata/immutable/layout.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 6679fc94c..07b6b8b3b 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -17,8 +17,10 @@ from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE from allmydata.util import mathutil, observer, pipeline, log from allmydata.util.assertutil import precondition +from allmydata.util.deferredutil import async_to_deferred from allmydata.storage.server import si_b2a + class LayoutInvalid(Exception): """ There is something wrong with these bytes so they can't be interpreted as the kind of immutable file that I know how to download.""" @@ -311,8 +313,6 @@ class WriteBucketProxy_v2(WriteBucketProxy): @implementer(IStorageBucketReader) class ReadBucketProxy(object): - MAX_UEB_SIZE = 2000 # actual size is closer to 419, but varies by a few bytes - def __init__(self, rref, server, storage_index): self._rref = rref self._server = server @@ -389,10 +389,15 @@ class ReadBucketProxy(object): self._offsets[field] = offset return self._offsets - def _fetch_sharehashtree_and_ueb(self, offsets): + @async_to_deferred + async def _fetch_sharehashtree_and_ueb(self, offsets): + [ueb_length] = struct.unpack( + await self._read(offsets['share_hashes'], self._fieldsize), + self._fieldstruct + ) sharehashtree_size = offsets['uri_extension'] - offsets['share_hashes'] return self._read(offsets['share_hashes'], - self.MAX_UEB_SIZE+sharehashtree_size) + ueb_length + self._fieldsize +sharehashtree_size) def _parse_sharehashtree_and_ueb(self, data): sharehashtree_size = self._offsets['uri_extension'] - self._offsets['share_hashes'] From 444bc724c54a07ef4e0dddb53706e1e1d16091b3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 16 Sep 2022 10:38:29 -0400 Subject: [PATCH 23/91] A better approach to MAX_UEB_SIZE: just delete the code since it's not used in practice. --- src/allmydata/immutable/layout.py | 59 ++++++------------------------- 1 file changed, 10 insertions(+), 49 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 07b6b8b3b..d552d43c4 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -17,7 +17,6 @@ from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE from allmydata.util import mathutil, observer, pipeline, log from allmydata.util.assertutil import precondition -from allmydata.util.deferredutil import async_to_deferred from allmydata.storage.server import si_b2a @@ -340,11 +339,6 @@ class ReadBucketProxy(object): # TODO: for small shares, read the whole bucket in _start() d = self._fetch_header() d.addCallback(self._parse_offsets) - # XXX The following two callbacks implement a slightly faster/nicer - # way to get the ueb and sharehashtree, but it requires that the - # storage server be >= v1.3.0. - # d.addCallback(self._fetch_sharehashtree_and_ueb) - # d.addCallback(self._parse_sharehashtree_and_ueb) def _fail_waiters(f): self._ready.fire(f) def _notify_waiters(result): @@ -389,34 +383,6 @@ class ReadBucketProxy(object): self._offsets[field] = offset return self._offsets - @async_to_deferred - async def _fetch_sharehashtree_and_ueb(self, offsets): - [ueb_length] = struct.unpack( - await self._read(offsets['share_hashes'], self._fieldsize), - self._fieldstruct - ) - sharehashtree_size = offsets['uri_extension'] - offsets['share_hashes'] - return self._read(offsets['share_hashes'], - ueb_length + self._fieldsize +sharehashtree_size) - - def _parse_sharehashtree_and_ueb(self, data): - sharehashtree_size = self._offsets['uri_extension'] - self._offsets['share_hashes'] - if len(data) < sharehashtree_size: - raise LayoutInvalid("share hash tree truncated -- should have at least %d bytes -- not %d" % (sharehashtree_size, len(data))) - if sharehashtree_size % (2+HASH_SIZE) != 0: - raise LayoutInvalid("share hash tree malformed -- should have an even multiple of %d bytes -- not %d" % (2+HASH_SIZE, sharehashtree_size)) - self._share_hashes = [] - for i in range(0, sharehashtree_size, 2+HASH_SIZE): - hashnum = struct.unpack(">H", data[i:i+2])[0] - hashvalue = data[i+2:i+2+HASH_SIZE] - self._share_hashes.append( (hashnum, hashvalue) ) - - i = self._offsets['uri_extension']-self._offsets['share_hashes'] - if len(data) < i+self._fieldsize: - raise LayoutInvalid("not enough bytes to encode URI length -- should be at least %d bytes long, not %d " % (i+self._fieldsize, len(data),)) - length = struct.unpack(self._fieldstruct, data[i:i+self._fieldsize])[0] - self._ueb_data = data[i+self._fieldsize:i+self._fieldsize+length] - def _get_block_data(self, unused, blocknum, blocksize, thisblocksize): offset = self._offsets['data'] + blocknum * blocksize return self._read(offset, thisblocksize) @@ -459,20 +425,18 @@ class ReadBucketProxy(object): else: return defer.succeed([]) - def _get_share_hashes(self, unused=None): - if hasattr(self, '_share_hashes'): - return self._share_hashes - return self._get_share_hashes_the_old_way() - def get_share_hashes(self): d = self._start_if_needed() d.addCallback(self._get_share_hashes) return d - def _get_share_hashes_the_old_way(self): + def _get_share_hashes(self, _ignore): """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the end of the share, so we need to use the offset and - read just that much.""" + read just that much. + + HTTP-based storage protocol also doesn't like reading past the end. + """ offset = self._offsets['share_hashes'] size = self._offsets['uri_extension'] - offset if size % (2+HASH_SIZE) != 0: @@ -490,10 +454,13 @@ class ReadBucketProxy(object): d.addCallback(_unpack_share_hashes) return d - def _get_uri_extension_the_old_way(self, unused=None): + def _get_uri_extension(self, unused=None): """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the end of the share, so we need to fetch the UEB size - and then read just that much.""" + and then read just that much. + + HTTP-based storage protocol also doesn't like reading past the end. + """ offset = self._offsets['uri_extension'] d = self._read(offset, self._fieldsize) def _got_length(data): @@ -510,12 +477,6 @@ class ReadBucketProxy(object): d.addCallback(_got_length) return d - def _get_uri_extension(self, unused=None): - if hasattr(self, '_ueb_data'): - return self._ueb_data - else: - return self._get_uri_extension_the_old_way() - def get_uri_extension(self): d = self._start_if_needed() d.addCallback(self._get_uri_extension) From fb532a71ef4da28c91594e8d05695e267b747137 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 13 Sep 2022 22:43:09 -0600 Subject: [PATCH 24/91] own pid-file checks --- setup.py | 3 ++ src/allmydata/scripts/tahoe_run.py | 36 ++++++++++----- src/allmydata/util/pid.py | 72 ++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 12 deletions(-) create mode 100644 src/allmydata/util/pid.py diff --git a/setup.py b/setup.py index c3ee4eb90..bd16a61ce 100644 --- a/setup.py +++ b/setup.py @@ -138,6 +138,9 @@ install_requires = [ "treq", "cbor2", "pycddl", + + # for pid-file support + "psutil", ] setup_requires = [ diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 51be32ee3..21041f1ab 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -19,6 +19,7 @@ import os, sys from allmydata.scripts.common import BasedirOptions from twisted.scripts import twistd from twisted.python import usage +from twisted.python.filepath import FilePath from twisted.python.reflect import namedAny from twisted.internet.defer import maybeDeferred from twisted.application.service import Service @@ -27,6 +28,11 @@ from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin +from allmydata.util.pid import ( + check_pid_process, + cleanup_pidfile, + ProcessInTheWay, +) from allmydata.storage.crawler import ( MigratePickleFileError, ) @@ -35,28 +41,31 @@ from allmydata.node import ( PrivacyError, ) + def get_pidfile(basedir): """ Returns the path to the PID file. :param basedir: the node's base directory :returns: the path to the PID file """ - return os.path.join(basedir, u"twistd.pid") + return os.path.join(basedir, u"running.process") + def get_pid_from_pidfile(pidfile): """ Tries to read and return the PID stored in the node's PID file - (twistd.pid). + :param pidfile: try to read this PID file :returns: A numeric PID on success, ``None`` if PID file absent or inaccessible, ``-1`` if PID file invalid. """ try: with open(pidfile, "r") as f: - pid = f.read() + data = f.read().strip() except EnvironmentError: return None + pid, _ = data.split() try: pid = int(pid) except ValueError: @@ -64,6 +73,7 @@ def get_pid_from_pidfile(pidfile): return pid + def identify_node_type(basedir): """ :return unicode: None or one of: 'client' or 'introducer'. @@ -227,10 +237,8 @@ def run(config, runApp=twistd.runApp): print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - twistd_args = ["--nodaemon", "--rundir", basedir] - if sys.platform != "win32": - pidfile = get_pidfile(basedir) - twistd_args.extend(["--pidfile", pidfile]) + # we turn off Twisted's pid-file to use our own + twistd_args = ["--pidfile", None, "--nodaemon", "--rundir", basedir] twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -246,12 +254,16 @@ def run(config, runApp=twistd.runApp): return 1 twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} - # handle invalid PID file (twistd might not start otherwise) - if sys.platform != "win32" and get_pid_from_pidfile(pidfile) == -1: - print("found invalid PID file in %s - deleting it" % basedir, file=err) - os.remove(pidfile) + # before we try to run, check against our pidfile -- this will + # raise an exception if there appears to be a running process "in + # the way" + pidfile = FilePath(get_pidfile(config['basedir'])) + try: + check_pid_process(pidfile) + except ProcessInTheWay as e: + print("ERROR: {}".format(e)) + return 1 # We always pass --nodaemon so twistd.runApp does not daemonize. - print("running node in %s" % (quoted_basedir,), file=out) runApp(twistd_config) return 0 diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py new file mode 100644 index 000000000..21e30aa87 --- /dev/null +++ b/src/allmydata/util/pid.py @@ -0,0 +1,72 @@ +import os +import psutil + + +class ProcessInTheWay(Exception): + """ + our pidfile points at a running process + """ + + +def check_pid_process(pidfile, find_process=None): + """ + If another instance appears to be running already, raise an + exception. Otherwise, write our PID + start time to the pidfile + and arrange to delete it upon exit. + + :param FilePath pidfile: the file to read/write our PID from. + + :param Callable find_process: None, or a custom way to get a + Process objet (usually for tests) + + :raises ProcessInTheWay: if a running process exists at our PID + """ + find_process = psutil.Process if find_process is None else find_process + # check if we have another instance running already + if pidfile.exists(): + with pidfile.open("r") as f: + content = f.read().decode("utf8").strip() + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + try: + # if any other process is running at that PID, let the + # user decide if this is another magic-older + # instance. Automated programs may use the start-time to + # help decide this (if the PID is merely recycled, the + # start-time won't match). + proc = find_process(pid) + raise ProcessInTheWay( + "A process is already running as PID {}".format(pid) + ) + except psutil.NoSuchProcess: + print( + "'{pidpath}' refers to {pid} that isn't running".format( + pidpath=pidfile.path, + pid=pid, + ) + ) + # nothing is running at that PID so it must be a stale file + pidfile.remove() + + # write our PID + start-time to the pid-file + pid = os.getpid() + starttime = find_process(pid).create_time() + with pidfile.open("w") as f: + f.write("{} {}\n".format(pid, starttime).encode("utf8")) + + +def cleanup_pidfile(pidfile): + """ + Safely remove the given pidfile + """ + + try: + pidfile.remove() + except Exception as e: + print( + "Couldn't remove '{pidfile}': {err}.".format( + pidfile=pidfile.path, + err=e, + ) + ) From 3bfb60c6f426cadc25bb201e6e59165cedd2b490 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 19:57:01 -0600 Subject: [PATCH 25/91] back to context-manager, simplify --- src/allmydata/scripts/tahoe_run.py | 15 +++++++++------ src/allmydata/test/cli/test_run.py | 20 +++++++++++--------- src/allmydata/util/pid.py | 29 +++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 21041f1ab..07f5bf72c 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -30,8 +30,8 @@ from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin from allmydata.util.pid import ( check_pid_process, - cleanup_pidfile, ProcessInTheWay, + InvalidPidFile, ) from allmydata.storage.crawler import ( MigratePickleFileError, @@ -237,8 +237,13 @@ def run(config, runApp=twistd.runApp): print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - # we turn off Twisted's pid-file to use our own - twistd_args = ["--pidfile", None, "--nodaemon", "--rundir", basedir] + twistd_args = [ + # turn off Twisted's pid-file to use our own + "--pidfile", None, + # ensure twistd machinery does not daemonize. + "--nodaemon", + "--rundir", basedir, + ] twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -254,9 +259,7 @@ def run(config, runApp=twistd.runApp): return 1 twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} - # before we try to run, check against our pidfile -- this will - # raise an exception if there appears to be a running process "in - # the way" + # our own pid-style file contains PID and process creation time pidfile = FilePath(get_pidfile(config['basedir'])) try: check_pid_process(pidfile) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 28613e8c1..db01eb440 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -159,7 +159,7 @@ class RunTests(SyncTestCase): """ basedir = FilePath(self.mktemp()).asTextMode() basedir.makedirs() - basedir.child(u"twistd.pid").setContent(b"foo") + basedir.child(u"running.process").setContent(b"foo") basedir.child(u"tahoe-client.tac").setContent(b"") config = RunOptions() @@ -168,17 +168,19 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] - runs = [] - result_code = run(config, runApp=runs.append) + class DummyRunner: + runs = [] + _exitSignal = None + + def run(self): + self.runs.append(True) + + result_code = run(config, runner=DummyRunner()) self.assertThat( config.stderr.getvalue(), Contains("found invalid PID file in"), ) self.assertThat( - runs, - HasLength(1), - ) - self.assertThat( - result_code, - Equals(0), + DummyRunner.runs, + Equals([]) ) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 21e30aa87..3b488a2c2 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,5 +1,8 @@ import os import psutil +from contextlib import ( + contextmanager, +) class ProcessInTheWay(Exception): @@ -8,6 +11,13 @@ class ProcessInTheWay(Exception): """ +class InvalidPidFile(Exception): + """ + our pidfile isn't well-formed + """ + + +@contextmanager def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -26,9 +36,16 @@ def check_pid_process(pidfile, find_process=None): if pidfile.exists(): with pidfile.open("r") as f: content = f.read().decode("utf8").strip() - pid, starttime = content.split() - pid = int(pid) - starttime = float(starttime) + try: + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + except ValueError: + raise InvalidPidFile( + "found invalid PID file in {}".format( + pidfile + ) + ) try: # if any other process is running at that PID, let the # user decide if this is another magic-older @@ -55,11 +72,7 @@ def check_pid_process(pidfile, find_process=None): with pidfile.open("w") as f: f.write("{} {}\n".format(pid, starttime).encode("utf8")) - -def cleanup_pidfile(pidfile): - """ - Safely remove the given pidfile - """ + yield # setup completed, await cleanup try: pidfile.remove() From cad162bb8fb2d961c74f457be6e4495b00f0aeed Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 19:59:18 -0600 Subject: [PATCH 26/91] should have pid-file on windows too, now --- src/allmydata/test/cli/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index db01eb440..902e4011a 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -151,7 +151,7 @@ class RunTests(SyncTestCase): """ Tests for ``run``. """ - @skipIf(platform.isWindows(), "There are no PID files on Windows.") + def test_non_numeric_pid(self): """ If the pidfile exists but does not contain a numeric value, a complaint to From 0e0ebf6687280d0be5ae6a536a4f9d48958d03b7 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 20:06:32 -0600 Subject: [PATCH 27/91] more testing --- src/allmydata/test/cli/test_run.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 902e4011a..ecc81fe3f 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -20,6 +20,9 @@ from testtools import ( skipIf, ) +from hypothesis.strategies import text +from hypothesis import given + from testtools.matchers import ( Contains, Equals, @@ -44,6 +47,10 @@ from ...scripts.tahoe_run import ( RunOptions, run, ) +from ...util.pid import ( + check_pid_process, + InvalidPidFile, +) from ...scripts.runner import ( parse_options @@ -180,7 +187,18 @@ class RunTests(SyncTestCase): config.stderr.getvalue(), Contains("found invalid PID file in"), ) + # because the pidfile is invalid we shouldn't get to the + # .run() call itself. self.assertThat( DummyRunner.runs, Equals([]) ) + + @given(text()) + def test_pidfile_contents(self, content): + pidfile = FilePath("pidfile") + pidfile.setContent(content.encode("utf8")) + + with self.assertRaises(InvalidPidFile): + with check_pid_process(pidfile): + pass From e6adfc7726cc3e081d18b712e573ef265e49c3ca Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 20:22:07 -0600 Subject: [PATCH 28/91] news --- newsfragments/3926.incompat | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 newsfragments/3926.incompat diff --git a/newsfragments/3926.incompat b/newsfragments/3926.incompat new file mode 100644 index 000000000..3f58b4ba8 --- /dev/null +++ b/newsfragments/3926.incompat @@ -0,0 +1,10 @@ +Record both the PID and the process creation-time + +A new kind of pidfile in `running.process` records both +the PID and the creation-time of the process. This facilitates +automatic discovery of a "stale" pidfile that points to a +currently-running process. If the recorded creation-time matches +the creation-time of the running process, then it is a still-running +`tahoe run` proecss. Otherwise, the file is stale. + +The `twistd.pid` file is no longer present. \ No newline at end of file From 6048d1d9a99e5f88cd423a9524bede823277709f Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 21:13:30 -0600 Subject: [PATCH 29/91] in case hypothesis finds the magic --- src/allmydata/test/cli/test_run.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index ecc81fe3f..7bf87eea9 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -12,6 +12,7 @@ from future.utils import PY2 if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import re from six.moves import ( StringIO, ) @@ -21,7 +22,7 @@ from testtools import ( ) from hypothesis.strategies import text -from hypothesis import given +from hypothesis import given, assume from testtools.matchers import ( Contains, @@ -194,8 +195,11 @@ class RunTests(SyncTestCase): Equals([]) ) + good_file_content_re = re.compile(r"\w[0-9]*\w[0-9]*\w") + @given(text()) def test_pidfile_contents(self, content): + assume(not self.good_file_content_re.match(content)) pidfile = FilePath("pidfile") pidfile.setContent(content.encode("utf8")) From 642b604753dd9b9af2c740e04e65e58bbae00299 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 21:51:56 -0600 Subject: [PATCH 30/91] use stdin-closing for pidfile cleanup too --- src/allmydata/scripts/tahoe_run.py | 1 + src/allmydata/test/cli/test_run.py | 12 +++--------- src/allmydata/util/pid.py | 17 +++++++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 07f5bf72c..20d5c2bf1 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -30,6 +30,7 @@ from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin from allmydata.util.pid import ( check_pid_process, + cleanup_pidfile, ProcessInTheWay, InvalidPidFile, ) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 7bf87eea9..71085fddd 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -176,14 +176,8 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] - class DummyRunner: - runs = [] - _exitSignal = None - - def run(self): - self.runs.append(True) - - result_code = run(config, runner=DummyRunner()) + runs = [] + result_code = run(config, runApp=runs.append) self.assertThat( config.stderr.getvalue(), Contains("found invalid PID file in"), @@ -191,7 +185,7 @@ class RunTests(SyncTestCase): # because the pidfile is invalid we shouldn't get to the # .run() call itself. self.assertThat( - DummyRunner.runs, + runs, Equals([]) ) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 3b488a2c2..3ab955cb3 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,8 +1,5 @@ import os import psutil -from contextlib import ( - contextmanager, -) class ProcessInTheWay(Exception): @@ -17,7 +14,12 @@ class InvalidPidFile(Exception): """ -@contextmanager +class CannotRemovePidFile(Exception): + """ + something went wrong removing the pidfile + """ + + def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -72,12 +74,15 @@ def check_pid_process(pidfile, find_process=None): with pidfile.open("w") as f: f.write("{} {}\n".format(pid, starttime).encode("utf8")) - yield # setup completed, await cleanup +def cleanup_pidfile(pidfile): + """ + Safely clean up a PID-file + """ try: pidfile.remove() except Exception as e: - print( + raise CannotRemovePidFile( "Couldn't remove '{pidfile}': {err}.".format( pidfile=pidfile.path, err=e, From 82c72ddede1dbbe97365877186af27928a996c0b Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 21:58:20 -0600 Subject: [PATCH 31/91] cleanup --- src/allmydata/test/cli/test_run.py | 14 ++------------ src/allmydata/util/pid.py | 4 ++-- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 71085fddd..ae869e475 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -17,22 +17,14 @@ from six.moves import ( StringIO, ) -from testtools import ( - skipIf, -) - from hypothesis.strategies import text from hypothesis import given, assume from testtools.matchers import ( Contains, Equals, - HasLength, ) -from twisted.python.runtime import ( - platform, -) from twisted.python.filepath import ( FilePath, ) @@ -184,10 +176,8 @@ class RunTests(SyncTestCase): ) # because the pidfile is invalid we shouldn't get to the # .run() call itself. - self.assertThat( - runs, - Equals([]) - ) + self.assertThat(runs, Equals([])) + self.assertThat(result_code, Equals(1)) good_file_content_re = re.compile(r"\w[0-9]*\w[0-9]*\w") diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 3ab955cb3..ff8129bbc 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -50,11 +50,11 @@ def check_pid_process(pidfile, find_process=None): ) try: # if any other process is running at that PID, let the - # user decide if this is another magic-older + # user decide if this is another legitimate # instance. Automated programs may use the start-time to # help decide this (if the PID is merely recycled, the # start-time won't match). - proc = find_process(pid) + find_process(pid) raise ProcessInTheWay( "A process is already running as PID {}".format(pid) ) From 228bbbc2fe791b83af0d495df44882a63456b59f Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 22:39:59 -0600 Subject: [PATCH 32/91] new pid-file --- src/allmydata/test/cli_node_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli_node_api.py b/src/allmydata/test/cli_node_api.py index 410796be2..c324d5565 100644 --- a/src/allmydata/test/cli_node_api.py +++ b/src/allmydata/test/cli_node_api.py @@ -134,7 +134,7 @@ class CLINodeAPI(object): @property def twistd_pid_file(self): - return self.basedir.child(u"twistd.pid") + return self.basedir.child(u"running.process") @property def node_url_file(self): From 114d5e1ed8582fa130953227eced0528862ca381 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 23:08:46 -0600 Subject: [PATCH 33/91] pidfile on windows now --- src/allmydata/scripts/tahoe_run.py | 6 +++-- src/allmydata/test/test_runner.py | 36 ++++++++++++------------------ 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 20d5c2bf1..72b8e3eca 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -239,12 +239,14 @@ def run(config, runApp=twistd.runApp): return 1 twistd_args = [ - # turn off Twisted's pid-file to use our own - "--pidfile", None, # ensure twistd machinery does not daemonize. "--nodaemon", "--rundir", basedir, ] + if sys.platform != "win32": + # turn off Twisted's pid-file to use our own -- but only on + # windows, because twistd doesn't know about pidfiles there + twistd_args.extend(["--pidfile", None]) twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 3eb6b8a34..9b6357f46 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -418,9 +418,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): tahoe.active() - # We don't keep track of PIDs in files on Windows. - if not platform.isWindows(): - self.assertTrue(tahoe.twistd_pid_file.exists()) + self.assertTrue(tahoe.twistd_pid_file.exists()) self.assertTrue(tahoe.node_url_file.exists()) # rm this so we can detect when the second incarnation is ready @@ -493,9 +491,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): # change on restart storage_furl = fileutil.read(tahoe.storage_furl_file.path) - # We don't keep track of PIDs in files on Windows. - if not platform.isWindows(): - self.assertTrue(tahoe.twistd_pid_file.exists()) + self.assertTrue(tahoe.twistd_pid_file.exists()) # rm this so we can detect when the second incarnation is ready tahoe.node_url_file.remove() @@ -513,21 +509,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): fileutil.read(tahoe.storage_furl_file.path), ) - if not platform.isWindows(): - self.assertTrue( - tahoe.twistd_pid_file.exists(), - "PID file ({}) didn't exist when we expected it to. " - "These exist: {}".format( - tahoe.twistd_pid_file, - tahoe.twistd_pid_file.parent().listdir(), - ), - ) + self.assertTrue( + tahoe.twistd_pid_file.exists(), + "PID file ({}) didn't exist when we expected it to. " + "These exist: {}".format( + tahoe.twistd_pid_file, + tahoe.twistd_pid_file.parent().listdir(), + ), + ) yield tahoe.stop_and_wait() - if not platform.isWindows(): - # twistd.pid should be gone by now. - self.assertFalse(tahoe.twistd_pid_file.exists()) - + # twistd.pid should be gone by now. + self.assertFalse(tahoe.twistd_pid_file.exists()) def _remove(self, res, file): fileutil.remove(file) @@ -610,9 +603,8 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): ), ) - if not platform.isWindows(): - # It should not be running. - self.assertFalse(tahoe.twistd_pid_file.exists()) + # It should not be running. + self.assertFalse(tahoe.twistd_pid_file.exists()) # Wait for the operation to *complete*. If we got this far it's # because we got the expected message so we can expect the "tahoe ..." From aef2e96139fc0afc610736b181e218dce2aa9b79 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 17 Sep 2022 16:28:25 -0600 Subject: [PATCH 34/91] refactor: dispatch with our reactor, pass to tahoe_run --- src/allmydata/scripts/runner.py | 12 ++++-------- src/allmydata/scripts/tahoe_run.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index a0d8a752b..756c26f2c 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -47,11 +47,6 @@ if _default_nodedir: NODEDIR_HELP += " [default for most commands: " + quote_local_unicode_path(_default_nodedir) + "]" -# XXX all this 'dispatch' stuff needs to be unified + fixed up -_control_node_dispatch = { - "run": tahoe_run.run, -} - process_control_commands = [ ("run", None, tahoe_run.RunOptions, "run a node without daemonizing"), ] # type: SubCommands @@ -195,6 +190,7 @@ def parse_or_exit(config, argv, stdout, stderr): return config def dispatch(config, + reactor, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): command = config.subCommand so = config.subOptions @@ -206,8 +202,8 @@ def dispatch(config, if command in create_dispatch: f = create_dispatch[command] - elif command in _control_node_dispatch: - f = _control_node_dispatch[command] + elif command == "run": + f = lambda config: tahoe_run.run(reactor, config) elif command in debug.dispatch: f = debug.dispatch[command] elif command in admin.dispatch: @@ -361,7 +357,7 @@ def _run_with_reactor(reactor, config, argv, stdout, stderr): stderr, ) d.addCallback(_maybe_enable_eliot_logging, reactor) - d.addCallback(dispatch, stdout=stdout, stderr=stderr) + d.addCallback(dispatch, reactor, stdout=stdout, stderr=stderr) def _show_exception(f): # when task.react() notices a non-SystemExit exception, it does # log.err() with the failure and then exits with rc=1. We want this diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 72b8e3eca..dd4561a4b 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -217,7 +217,7 @@ class DaemonizeTahoeNodePlugin(object): return DaemonizeTheRealService(self.nodetype, self.basedir, so) -def run(config, runApp=twistd.runApp): +def run(reactor, config, runApp=twistd.runApp): """ Runs a Tahoe-LAFS node in the foreground. From 8b2cb79070edabd20fb9bdbb41de51458788e50a Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 17 Sep 2022 16:29:03 -0600 Subject: [PATCH 35/91] cleanup via reactor --- src/allmydata/scripts/tahoe_run.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index dd4561a4b..a5b833233 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -269,6 +269,11 @@ def run(reactor, config, runApp=twistd.runApp): except ProcessInTheWay as e: print("ERROR: {}".format(e)) return 1 + else: + reactor.addSystemEventTrigger( + "during", "shutdown", + lambda: cleanup_pidfile(pidfile) + ) # We always pass --nodaemon so twistd.runApp does not daemonize. runApp(twistd_config) From 254a994eb53035b70a653b47b2951d6159634a23 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 17 Sep 2022 16:41:17 -0600 Subject: [PATCH 36/91] flake8 --- src/allmydata/scripts/tahoe_run.py | 2 +- src/allmydata/test/test_runner.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index a5b833233..7722fef51 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -266,7 +266,7 @@ def run(reactor, config, runApp=twistd.runApp): pidfile = FilePath(get_pidfile(config['basedir'])) try: check_pid_process(pidfile) - except ProcessInTheWay as e: + except (ProcessInTheWay, InvalidPidFile) as e: print("ERROR: {}".format(e)) return 1 else: diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 9b6357f46..14d0dfb7f 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -47,9 +47,6 @@ from twisted.internet.defer import ( DeferredList, ) from twisted.python.filepath import FilePath -from twisted.python.runtime import ( - platform, -) from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv from allmydata.test import common_util From fe80126e3fcffe56c171188c7cd5847f19bf6f7b Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 18 Sep 2022 22:39:25 -0600 Subject: [PATCH 37/91] fixups --- src/allmydata/scripts/tahoe_run.py | 2 +- src/allmydata/test/cli/test_run.py | 4 +++- src/allmydata/test/common_util.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 7722fef51..6dfa726a3 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -267,7 +267,7 @@ def run(reactor, config, runApp=twistd.runApp): try: check_pid_process(pidfile) except (ProcessInTheWay, InvalidPidFile) as e: - print("ERROR: {}".format(e)) + print("ERROR: {}".format(e), file=err) return 1 else: reactor.addSystemEventTrigger( diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index ae869e475..6358b70dd 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -168,8 +168,10 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] + from twisted.internet import reactor + runs = [] - result_code = run(config, runApp=runs.append) + result_code = run(reactor, config, runApp=runs.append) self.assertThat( config.stderr.getvalue(), Contains("found invalid PID file in"), diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index e63c3eef8..b6d352ab1 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -145,6 +145,7 @@ def run_cli_native(verb, *args, **kwargs): ) d.addCallback( runner.dispatch, + reactor, stdin=stdin, stdout=stdout, stderr=stderr, From ef0b2aca1769dbdf11c5eb50b66c186d2ee9e22f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 19 Sep 2022 10:12:11 -0400 Subject: [PATCH 38/91] Adjust NURL spec to new decisions. --- docs/specifications/url.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 31fb05fad..1ce3b2a7f 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -47,27 +47,27 @@ This can be considered to expand to "**N**\ ew URLs" or "Authe\ **N**\ ticating The anticipated use for a **NURL** will still be to establish a TLS connection to a peer. The protocol run over that TLS connection could be Foolscap though it is more likely to be an HTTP-based protocol (such as GBS). +Unlike fURLs, only a single net-loc is included, for consistency with other forms of URLs. +As a result, multiple NURLs may be available for a single server. + Syntax ------ The EBNF for a NURL is as follows:: - nurl = scheme, hash, "@", net-loc-list, "/", swiss-number, [ version1 ] - - scheme = "pb://" + nurl = tcp-nurl | tor-nurl | i2p-nurl + tcp-nurl = "pb://", hash, "@", tcp-loc, "/", swiss-number, [ version1 ] + tor-nurl = "pb+tor://", hash, "@", tcp-loc, "/", swiss-number, [ version1 ] + i2p-nurl = "pb+i2p://", hash, "@", i2p-loc, "/", swiss-number, [ version1 ] hash = unreserved - net-loc-list = net-loc, [ { ",", net-loc } ] - net-loc = tcp-loc | tor-loc | i2p-loc - - tcp-loc = [ "tcp:" ], hostname, [ ":" port ] - tor-loc = "tor:", hostname, [ ":" port ] - i2p-loc = "i2p:", i2p-addr, [ ":" port ] - - i2p-addr = { unreserved }, ".i2p" + tcp-loc = hostname, [ ":" port ] hostname = domain | IPv4address | IPv6address + i2p-loc = i2p-addr, [ ":" port ] + i2p-addr = { unreserved }, ".i2p" + swiss-number = segment version1 = "#v=1" From 4b2725df006ae172f267072a8bcb222b6be6aad9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 20 Sep 2022 10:09:43 -0400 Subject: [PATCH 39/91] Try to prevent leaking timeouts. --- src/allmydata/protocol_switch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 89570436c..a17f3055c 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -151,6 +151,10 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): 30, self.transport.abortConnection ) + def connectionLost(self, reason): + if self._timeout.active(): + self._timeout.cancel() + def dataReceived(self, data: bytes) -> None: """Handle incoming data. From 81c8e1c57b8b926ebb3a396f653d7149bd4f6577 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:24:02 -0600 Subject: [PATCH 40/91] windows is special --- src/allmydata/test/test_runner.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 14d0dfb7f..5d8143558 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -42,6 +42,7 @@ from twisted.trial import unittest from twisted.internet import reactor from twisted.python import usage +from twisted.python.runtime import platform from twisted.internet.defer import ( inlineCallbacks, DeferredList, @@ -516,8 +517,12 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): ) yield tahoe.stop_and_wait() - # twistd.pid should be gone by now. - self.assertFalse(tahoe.twistd_pid_file.exists()) + # twistd.pid should be gone by now -- except on Windows, where + # killing a subprocess immediately exits with no chance for + # any shutdown code (that is, no Twisted shutdown hooks can + # run). + if not platform.isWindows(): + self.assertFalse(tahoe.twistd_pid_file.exists()) def _remove(self, res, file): fileutil.remove(file) From 6db1476dacc99c00a12d88b1c6af6a8aa76f3404 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:44:21 -0600 Subject: [PATCH 41/91] comment typo --- src/allmydata/scripts/tahoe_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 6dfa726a3..721ced376 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -244,7 +244,7 @@ def run(reactor, config, runApp=twistd.runApp): "--rundir", basedir, ] if sys.platform != "win32": - # turn off Twisted's pid-file to use our own -- but only on + # turn off Twisted's pid-file to use our own -- but not on # windows, because twistd doesn't know about pidfiles there twistd_args.extend(["--pidfile", None]) twistd_args.extend(config.twistd_args) From 0eeb11c9cd45598fbe2e5bdccb4f9cf50fe222f3 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:44:51 -0600 Subject: [PATCH 42/91] after shutdown --- src/allmydata/scripts/tahoe_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 721ced376..40c4a6612 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -271,7 +271,7 @@ def run(reactor, config, runApp=twistd.runApp): return 1 else: reactor.addSystemEventTrigger( - "during", "shutdown", + "after", "shutdown", lambda: cleanup_pidfile(pidfile) ) From 77bc83d341794afbd0c6884fb5e0e914dbe90632 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:45:19 -0600 Subject: [PATCH 43/91] incorrectly removed --- src/allmydata/scripts/tahoe_run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 40c4a6612..eb4bb0b66 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -276,5 +276,6 @@ def run(reactor, config, runApp=twistd.runApp): ) # We always pass --nodaemon so twistd.runApp does not daemonize. + print("running node in %s" % (quoted_basedir,), file=out) runApp(twistd_config) return 0 From 1f29cc9c29e42a472ce893259f5bdbf2a31c00e0 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:50:46 -0600 Subject: [PATCH 44/91] windows special --- src/allmydata/test/test_runner.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 5d8143558..cf6e9f3b5 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -605,8 +605,10 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): ), ) - # It should not be running. - self.assertFalse(tahoe.twistd_pid_file.exists()) + # It should not be running (but windows shutdown can't run + # code so the PID file still exists there). + if not platform.isWindows(): + self.assertFalse(tahoe.twistd_pid_file.exists()) # Wait for the operation to *complete*. If we got this far it's # because we got the expected message so we can expect the "tahoe ..." From 5973196931d2143f68a34d9b01857339582ec5c0 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:00:27 -0600 Subject: [PATCH 45/91] refactor: use filelock and test it --- setup.py | 1 + src/allmydata/test/test_runner.py | 47 ++++++++++++ src/allmydata/util/pid.py | 117 ++++++++++++++++++------------ 3 files changed, 119 insertions(+), 46 deletions(-) diff --git a/setup.py b/setup.py index bd16a61ce..d99831347 100644 --- a/setup.py +++ b/setup.py @@ -141,6 +141,7 @@ install_requires = [ # for pid-file support "psutil", + "filelock", ] setup_requires = [ diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index cf6e9f3b5..5a8311649 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -50,6 +50,11 @@ from twisted.internet.defer import ( from twisted.python.filepath import FilePath from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv +from allmydata.util.pid import ( + check_pid_process, + _pidfile_to_lockpath, + ProcessInTheWay, +) from allmydata.test import common_util import allmydata from allmydata.scripts.runner import ( @@ -617,3 +622,45 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): # What's left is a perfect indicator that the process has exited and # we won't get blamed for leaving the reactor dirty. yield client_running + + +class PidFileLocking(SyncTestCase): + """ + Direct tests for allmydata.util.pid functions + """ + + def test_locking(self): + """ + Fail to create a pidfile if another process has the lock already. + """ + # this can't just be "our" process because the locking library + # allows the same process to acquire a lock multiple times. + pidfile = FilePath("foo") + lockfile = _pidfile_to_lockpath(pidfile) + + with open("code.py", "w") as f: + f.write( + "\n".join([ + "import filelock, time", + "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), + " print('.', flush=True)", + " time.sleep(5)", + ]) + ) + proc = Popen( + [sys.executable, "code.py"], + stdout=PIPE, + stderr=PIPE, + start_new_session=True, + ) + # make sure our subprocess has had time to acquire the lock + # for sure (from the "." it prints) + self.assertThat( + proc.stdout.read(1), + Equals(b".") + ) + + # we should not be able to acuire this corresponding lock as well + with self.assertRaises(ProcessInTheWay): + check_pid_process(pidfile) + proc.terminate() diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index ff8129bbc..e256615d6 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,6 +1,10 @@ import os import psutil +# the docs are a little misleading, but this is either WindowsFileLock +# or UnixFileLock depending upon the platform we're currently on +from filelock import FileLock, Timeout + class ProcessInTheWay(Exception): """ @@ -20,6 +24,14 @@ class CannotRemovePidFile(Exception): """ +def _pidfile_to_lockpath(pidfile): + """ + internal helper. + :returns FilePath: a path to use for file-locking the given pidfile + """ + return pidfile.sibling("{}.lock".format(pidfile.basename())) + + def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -34,57 +46,70 @@ def check_pid_process(pidfile, find_process=None): :raises ProcessInTheWay: if a running process exists at our PID """ find_process = psutil.Process if find_process is None else find_process - # check if we have another instance running already - if pidfile.exists(): - with pidfile.open("r") as f: - content = f.read().decode("utf8").strip() - try: - pid, starttime = content.split() - pid = int(pid) - starttime = float(starttime) - except ValueError: - raise InvalidPidFile( - "found invalid PID file in {}".format( - pidfile - ) - ) - try: - # if any other process is running at that PID, let the - # user decide if this is another legitimate - # instance. Automated programs may use the start-time to - # help decide this (if the PID is merely recycled, the - # start-time won't match). - find_process(pid) - raise ProcessInTheWay( - "A process is already running as PID {}".format(pid) - ) - except psutil.NoSuchProcess: - print( - "'{pidpath}' refers to {pid} that isn't running".format( - pidpath=pidfile.path, - pid=pid, - ) - ) - # nothing is running at that PID so it must be a stale file - pidfile.remove() + lock_path = _pidfile_to_lockpath(pidfile) - # write our PID + start-time to the pid-file - pid = os.getpid() - starttime = find_process(pid).create_time() - with pidfile.open("w") as f: - f.write("{} {}\n".format(pid, starttime).encode("utf8")) + try: + # a short timeout is fine, this lock should only be active + # while someone is reading or deleting the pidfile .. and + # facilitates testing the locking itself. + with FileLock(lock_path.path, timeout=2): + # check if we have another instance running already + if pidfile.exists(): + with pidfile.open("r") as f: + content = f.read().decode("utf8").strip() + try: + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + except ValueError: + raise InvalidPidFile( + "found invalid PID file in {}".format( + pidfile + ) + ) + try: + # if any other process is running at that PID, let the + # user decide if this is another legitimate + # instance. Automated programs may use the start-time to + # help decide this (if the PID is merely recycled, the + # start-time won't match). + find_process(pid) + raise ProcessInTheWay( + "A process is already running as PID {}".format(pid) + ) + except psutil.NoSuchProcess: + print( + "'{pidpath}' refers to {pid} that isn't running".format( + pidpath=pidfile.path, + pid=pid, + ) + ) + # nothing is running at that PID so it must be a stale file + pidfile.remove() + + # write our PID + start-time to the pid-file + pid = os.getpid() + starttime = find_process(pid).create_time() + with pidfile.open("w") as f: + f.write("{} {}\n".format(pid, starttime).encode("utf8")) + except Timeout: + raise ProcessInTheWay( + "Another process is still locking {}".format(pidfile.path) + ) def cleanup_pidfile(pidfile): """ Safely clean up a PID-file """ - try: - pidfile.remove() - except Exception as e: - raise CannotRemovePidFile( - "Couldn't remove '{pidfile}': {err}.".format( - pidfile=pidfile.path, - err=e, + lock_path = _pidfile_to_lockpath(pidfile) + with FileLock(lock_path.path): + try: + pidfile.remove() + except Exception as e: + raise CannotRemovePidFile( + "Couldn't remove '{pidfile}': {err}.".format( + pidfile=pidfile.path, + err=e, + ) ) - ) From ea39e4ca6902daad125596f5e1e2b81989e9cb6b Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:01:28 -0600 Subject: [PATCH 46/91] docstring --- src/allmydata/test/cli/test_run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 6358b70dd..551164d3c 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -185,6 +185,9 @@ class RunTests(SyncTestCase): @given(text()) def test_pidfile_contents(self, content): + """ + invalid contents for a pidfile raise errors + """ assume(not self.good_file_content_re.match(content)) pidfile = FilePath("pidfile") pidfile.setContent(content.encode("utf8")) From 56775dde192c90b48fa85cfcb4a2651f5b264791 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:05:30 -0600 Subject: [PATCH 47/91] refactor: parsing in a function --- src/allmydata/scripts/tahoe_run.py | 6 +++--- src/allmydata/util/pid.py | 34 +++++++++++++++++++----------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index eb4bb0b66..4d17492d4 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -29,6 +29,7 @@ from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_pat from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin from allmydata.util.pid import ( + parse_pidfile, check_pid_process, cleanup_pidfile, ProcessInTheWay, @@ -66,10 +67,9 @@ def get_pid_from_pidfile(pidfile): except EnvironmentError: return None - pid, _ = data.split() try: - pid = int(pid) - except ValueError: + pid, _ = parse_pidfile(pidfile) + except InvalidPidFile: return -1 return pid diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index e256615d6..1cb2cc45a 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -32,6 +32,27 @@ def _pidfile_to_lockpath(pidfile): return pidfile.sibling("{}.lock".format(pidfile.basename())) +def parse_pidfile(pidfile): + """ + :param FilePath pidfile: + :returns tuple: 2-tuple of pid, creation-time as int, float + :raises InvalidPidFile: on error + """ + with pidfile.open("r") as f: + content = f.read().decode("utf8").strip() + try: + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + except ValueError: + raise InvalidPidFile( + "found invalid PID file in {}".format( + pidfile + ) + ) + return pid, startime + + def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -55,18 +76,7 @@ def check_pid_process(pidfile, find_process=None): with FileLock(lock_path.path, timeout=2): # check if we have another instance running already if pidfile.exists(): - with pidfile.open("r") as f: - content = f.read().decode("utf8").strip() - try: - pid, starttime = content.split() - pid = int(pid) - starttime = float(starttime) - except ValueError: - raise InvalidPidFile( - "found invalid PID file in {}".format( - pidfile - ) - ) + pid, starttime = parse_pidfile(pidfile) try: # if any other process is running at that PID, let the # user decide if this is another legitimate From 390c8c52da3f9f0ea21ad789caf7c8f6ae9bbc74 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:23:30 -0600 Subject: [PATCH 48/91] formatting + typo --- newsfragments/3926.incompat | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/newsfragments/3926.incompat b/newsfragments/3926.incompat index 3f58b4ba8..674ad289c 100644 --- a/newsfragments/3926.incompat +++ b/newsfragments/3926.incompat @@ -1,10 +1,10 @@ -Record both the PID and the process creation-time +Record both the PID and the process creation-time: -A new kind of pidfile in `running.process` records both +a new kind of pidfile in `running.process` records both the PID and the creation-time of the process. This facilitates automatic discovery of a "stale" pidfile that points to a currently-running process. If the recorded creation-time matches the creation-time of the running process, then it is a still-running -`tahoe run` proecss. Otherwise, the file is stale. +`tahoe run` process. Otherwise, the file is stale. The `twistd.pid` file is no longer present. \ No newline at end of file From e111694b3e33e54db974acbd057d74380c6de4ce Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:28:09 -0600 Subject: [PATCH 49/91] get rid of find_process= --- src/allmydata/util/pid.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 1cb2cc45a..d681d819e 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -53,7 +53,7 @@ def parse_pidfile(pidfile): return pid, startime -def check_pid_process(pidfile, find_process=None): +def check_pid_process(pidfile): """ If another instance appears to be running already, raise an exception. Otherwise, write our PID + start time to the pidfile @@ -61,12 +61,8 @@ def check_pid_process(pidfile, find_process=None): :param FilePath pidfile: the file to read/write our PID from. - :param Callable find_process: None, or a custom way to get a - Process objet (usually for tests) - :raises ProcessInTheWay: if a running process exists at our PID """ - find_process = psutil.Process if find_process is None else find_process lock_path = _pidfile_to_lockpath(pidfile) try: @@ -83,7 +79,7 @@ def check_pid_process(pidfile, find_process=None): # instance. Automated programs may use the start-time to # help decide this (if the PID is merely recycled, the # start-time won't match). - find_process(pid) + psutil.Process(pid) raise ProcessInTheWay( "A process is already running as PID {}".format(pid) ) @@ -98,8 +94,7 @@ def check_pid_process(pidfile, find_process=None): pidfile.remove() # write our PID + start-time to the pid-file - pid = os.getpid() - starttime = find_process(pid).create_time() + starttime = psutil.Process().create_time() with pidfile.open("w") as f: f.write("{} {}\n".format(pid, starttime).encode("utf8")) except Timeout: From 0a09d23525fc4be928fa288c1301327d6eaccf32 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:29:40 -0600 Subject: [PATCH 50/91] more docstring --- src/allmydata/util/pid.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index d681d819e..f965d72ab 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -105,7 +105,8 @@ def check_pid_process(pidfile): def cleanup_pidfile(pidfile): """ - Safely clean up a PID-file + Remove the pidfile specified (respecting locks). If anything at + all goes wrong, `CannotRemovePidFile` is raised. """ lock_path = _pidfile_to_lockpath(pidfile) with FileLock(lock_path.path): From 6eebbda7c6c06732932c50a96ba1a5315c9d35f4 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:07:29 -0600 Subject: [PATCH 51/91] documentation, example code --- docs/check_running.py | 47 +++++++++++++++++++++++++++++++++++++++++++ docs/running.rst | 29 ++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 docs/check_running.py diff --git a/docs/check_running.py b/docs/check_running.py new file mode 100644 index 000000000..ecc55da34 --- /dev/null +++ b/docs/check_running.py @@ -0,0 +1,47 @@ + +import psutil +import filelock + + +def can_spawn_tahoe(pidfile): + """ + Determine if we can spawn a Tahoe-LAFS for the given pidfile. That + pidfile may be deleted if it is stale. + + :param pathlib.Path pidfile: the file to check, that is the Path + to "running.process" in a Tahoe-LAFS configuration directory + + :returns bool: True if we can spawn `tahoe run` here + """ + lockpath = pidfile.parent / (pidfile.name + ".lock") + with filelock.FileLock(lockpath): + try: + with pidfile.open("r") as f: + pid, create_time = f.read().strip().split(" ", 1) + except FileNotFoundError: + return True + + # somewhat interesting: we have a pidfile + pid = int(pid) + create_time = float(create_time) + + try: + proc = psutil.Process(pid) + # most interesting case: there _is_ a process running at the + # recorded PID -- but did it just happen to get that PID, or + # is it the very same one that wrote the file? + if create_time == proc.create_time(): + # _not_ stale! another intance is still running against + # this configuration + return False + + except psutil.NoSuchProcess: + pass + + # the file is stale + pidfile.unlink() + return True + + +from pathlib import Path +print("can spawn?", can_spawn_tahoe(Path("running.process"))) diff --git a/docs/running.rst b/docs/running.rst index 406c8200b..2cff59928 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -124,6 +124,35 @@ Tahoe-LAFS. .. _magic wormhole: https://magic-wormhole.io/ +Multiple Instances +------------------ + +Running multiple instances against the same configuration directory isn't supported. +This will lead to undefined behavior and could corrupt the configuration state. + +We attempt to avoid this situation with a "pidfile"-style file in the config directory called ``running.process``. +There may be a parallel file called ``running.process.lock`` in existence. + +The ``.lock`` file exists to make sure only one process modifies ``running.process`` at once. +The lock file is managed by the `lockfile `_ library. +If you wish to make use of ``running.process`` for any reason you should also lock it and follow the semantics of lockfile. + +If ``running.process` exists it file contains the PID and the creation-time of the process. +When no such file exists, there is no other process running on this configuration. +If there is a ``running.process`` file, it may be a leftover file or it may indicate that another process is running against this config. +To tell the difference, determine if the PID in the file exists currently. +If it does, check the creation-time of the process versus the one in the file. +If these match, there is another process currently running. +Otherwise, the file is stale -- it should be removed before starting Tahoe-LAFS. + +Some example Python code to check the above situations: + +.. literalinclude:: check_running.py + + + + + A note about small grids ------------------------ From 930f4029f370313222b5c0872754f1db16434029 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:07:46 -0600 Subject: [PATCH 52/91] properly write pid, create-time --- src/allmydata/util/pid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index f965d72ab..1a833f285 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -94,9 +94,9 @@ def check_pid_process(pidfile): pidfile.remove() # write our PID + start-time to the pid-file - starttime = psutil.Process().create_time() + proc = psutil.Process() with pidfile.open("w") as f: - f.write("{} {}\n".format(pid, starttime).encode("utf8")) + f.write("{} {}\n".format(proc.pid, proc.create_time()).encode("utf8")) except Timeout: raise ProcessInTheWay( "Another process is still locking {}".format(pidfile.path) From 8474ecf83d46a25a473269f4f7907a5eb6e6e552 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:15:07 -0600 Subject: [PATCH 53/91] typo --- src/allmydata/util/pid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 1a833f285..c13dc32f3 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -50,7 +50,7 @@ def parse_pidfile(pidfile): pidfile ) ) - return pid, startime + return pid, starttime def check_pid_process(pidfile): From fedea9696412d7397f58c11ea04a9148c55f8fd8 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:26:14 -0600 Subject: [PATCH 54/91] less state --- src/allmydata/test/cli/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 551164d3c..e84f52096 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -168,7 +168,7 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] - from twisted.internet import reactor + reactor = MemoryReactor() runs = [] result_code = run(reactor, config, runApp=runs.append) From 8d8b0e6f01cdd8ab1f64593eda772a8b7db6c3d6 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:40:25 -0600 Subject: [PATCH 55/91] cleanup --- src/allmydata/scripts/tahoe_run.py | 6 +----- src/allmydata/util/pid.py | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 4d17492d4..e22e8c307 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -62,13 +62,9 @@ def get_pid_from_pidfile(pidfile): inaccessible, ``-1`` if PID file invalid. """ try: - with open(pidfile, "r") as f: - data = f.read().strip() + pid, _ = parse_pidfile(pidfile) except EnvironmentError: return None - - try: - pid, _ = parse_pidfile(pidfile) except InvalidPidFile: return -1 diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index c13dc32f3..f12c201d1 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,4 +1,3 @@ -import os import psutil # the docs are a little misleading, but this is either WindowsFileLock From 4f5a1ac37222e51974bcb0a28b5ec9e0e6c0e944 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 23:36:23 -0600 Subject: [PATCH 56/91] naming? --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 5a8311649..962dffd1a 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -638,7 +638,7 @@ class PidFileLocking(SyncTestCase): pidfile = FilePath("foo") lockfile = _pidfile_to_lockpath(pidfile) - with open("code.py", "w") as f: + with open("other_lock.py", "w") as f: f.write( "\n".join([ "import filelock, time", @@ -648,7 +648,7 @@ class PidFileLocking(SyncTestCase): ]) ) proc = Popen( - [sys.executable, "code.py"], + [sys.executable, "other_lock.py"], stdout=PIPE, stderr=PIPE, start_new_session=True, From 8ebe331c358789f3af8dd9d64607ee63404077d7 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 22 Sep 2022 00:11:20 -0600 Subject: [PATCH 57/91] maybe a newline helps --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 962dffd1a..3d8180c7a 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -643,7 +643,7 @@ class PidFileLocking(SyncTestCase): "\n".join([ "import filelock, time", "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), - " print('.', flush=True)", + " print('.\n', flush=True)", " time.sleep(5)", ]) ) @@ -657,7 +657,7 @@ class PidFileLocking(SyncTestCase): # for sure (from the "." it prints) self.assertThat( proc.stdout.read(1), - Equals(b".") + Equals(b".\n") ) # we should not be able to acuire this corresponding lock as well From a182a2507987213b519bcb22c6f49eec0004830c Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 22 Sep 2022 21:43:20 -0600 Subject: [PATCH 58/91] backslashes --- src/allmydata/test/test_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 3d8180c7a..e6b7b746f 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -641,9 +641,10 @@ class PidFileLocking(SyncTestCase): with open("other_lock.py", "w") as f: f.write( "\n".join([ - "import filelock, time", + "import filelock, time, sys", "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), - " print('.\n', flush=True)", + " sys.stdout.write('.\\n')", + " sys.stdout.flush()", " time.sleep(5)", ]) ) @@ -656,7 +657,7 @@ class PidFileLocking(SyncTestCase): # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) self.assertThat( - proc.stdout.read(1), + proc.stdout.read(2), Equals(b".\n") ) From 62b92585c62c44694e5db7a8769a772b2d712a07 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 22 Sep 2022 23:57:19 -0600 Subject: [PATCH 59/91] simplify --- src/allmydata/test/test_runner.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index e6b7b746f..5431fbaa9 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -656,12 +656,9 @@ class PidFileLocking(SyncTestCase): ) # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) - self.assertThat( - proc.stdout.read(2), - Equals(b".\n") - ) + proc.stdout.read(2), - # we should not be able to acuire this corresponding lock as well + # acquiring the same lock should fail; it is locked by the subprocess with self.assertRaises(ProcessInTheWay): check_pid_process(pidfile) proc.terminate() From 7fdeb8797e8164f1b0fd15ddda4108417545e00d Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 23 Sep 2022 00:26:39 -0600 Subject: [PATCH 60/91] hardcoding bad --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 5431fbaa9..f414ed8b3 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -635,7 +635,7 @@ class PidFileLocking(SyncTestCase): """ # this can't just be "our" process because the locking library # allows the same process to acquire a lock multiple times. - pidfile = FilePath("foo") + pidfile = FilePath(self.mktemp()) lockfile = _pidfile_to_lockpath(pidfile) with open("other_lock.py", "w") as f: From f2cfd96b5e3af0fe82a7bf1ef770cad08d3969cd Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 23 Sep 2022 01:04:58 -0600 Subject: [PATCH 61/91] typo, longer timeout --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index f414ed8b3..b80891642 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -645,7 +645,7 @@ class PidFileLocking(SyncTestCase): "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), " sys.stdout.write('.\\n')", " sys.stdout.flush()", - " time.sleep(5)", + " time.sleep(10)", ]) ) proc = Popen( @@ -656,7 +656,7 @@ class PidFileLocking(SyncTestCase): ) # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) - proc.stdout.read(2), + proc.stdout.read(2) # acquiring the same lock should fail; it is locked by the subprocess with self.assertRaises(ProcessInTheWay): From 8991509f8c82642d75e3070ad7ae02bfe061977d Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 25 Sep 2022 00:16:40 -0600 Subject: [PATCH 62/91] blackslashes.... --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index b80891642..f8211ec02 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -642,7 +642,7 @@ class PidFileLocking(SyncTestCase): f.write( "\n".join([ "import filelock, time, sys", - "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), + "with filelock.FileLock(r'{}', timeout=1):".format(lockfile.path), " sys.stdout.write('.\\n')", " sys.stdout.flush()", " time.sleep(10)", From d42c00ae9293dd18c9f1efd22e86984c4725f222 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 25 Sep 2022 00:46:30 -0600 Subject: [PATCH 63/91] do all checks with lock --- docs/check_running.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/check_running.py b/docs/check_running.py index ecc55da34..55aae0015 100644 --- a/docs/check_running.py +++ b/docs/check_running.py @@ -21,22 +21,22 @@ def can_spawn_tahoe(pidfile): except FileNotFoundError: return True - # somewhat interesting: we have a pidfile - pid = int(pid) - create_time = float(create_time) + # somewhat interesting: we have a pidfile + pid = int(pid) + create_time = float(create_time) - try: - proc = psutil.Process(pid) - # most interesting case: there _is_ a process running at the - # recorded PID -- but did it just happen to get that PID, or - # is it the very same one that wrote the file? - if create_time == proc.create_time(): - # _not_ stale! another intance is still running against - # this configuration - return False + try: + proc = psutil.Process(pid) + # most interesting case: there _is_ a process running at the + # recorded PID -- but did it just happen to get that PID, or + # is it the very same one that wrote the file? + if create_time == proc.create_time(): + # _not_ stale! another intance is still running against + # this configuration + return False - except psutil.NoSuchProcess: - pass + except psutil.NoSuchProcess: + pass # the file is stale pidfile.unlink() From d16d233872df95b8e3876e3aa32e0fdb30cc9f98 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 25 Sep 2022 00:47:58 -0600 Subject: [PATCH 64/91] wording --- docs/running.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running.rst b/docs/running.rst index 2cff59928..b487f4ae3 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -128,7 +128,7 @@ Multiple Instances ------------------ Running multiple instances against the same configuration directory isn't supported. -This will lead to undefined behavior and could corrupt the configuration state. +This will lead to undefined behavior and could corrupt the configuration or state. We attempt to avoid this situation with a "pidfile"-style file in the config directory called ``running.process``. There may be a parallel file called ``running.process.lock`` in existence. From 4919b6d9066a10028e6548800a589929c3c094d9 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 09:34:36 -0600 Subject: [PATCH 65/91] typo Co-authored-by: Jean-Paul Calderone --- docs/running.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running.rst b/docs/running.rst index b487f4ae3..29df15a3c 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -137,7 +137,7 @@ The ``.lock`` file exists to make sure only one process modifies ``running.proce The lock file is managed by the `lockfile `_ library. If you wish to make use of ``running.process`` for any reason you should also lock it and follow the semantics of lockfile. -If ``running.process` exists it file contains the PID and the creation-time of the process. +If ``running.process`` exists then it contains the PID and the creation-time of the process. When no such file exists, there is no other process running on this configuration. If there is a ``running.process`` file, it may be a leftover file or it may indicate that another process is running against this config. To tell the difference, determine if the PID in the file exists currently. From 7aae2f78575541799002645e85a3aeab9f8706c2 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 09:34:54 -0600 Subject: [PATCH 66/91] Clarify Co-authored-by: Jean-Paul Calderone --- docs/running.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running.rst b/docs/running.rst index 29df15a3c..263448735 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -142,7 +142,7 @@ When no such file exists, there is no other process running on this configuratio If there is a ``running.process`` file, it may be a leftover file or it may indicate that another process is running against this config. To tell the difference, determine if the PID in the file exists currently. If it does, check the creation-time of the process versus the one in the file. -If these match, there is another process currently running. +If these match, there is another process currently running and using this config. Otherwise, the file is stale -- it should be removed before starting Tahoe-LAFS. Some example Python code to check the above situations: From a7398e13f7c82707738c3862cb085d7e2a055bb2 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 09:35:17 -0600 Subject: [PATCH 67/91] Update docs/check_running.py Co-authored-by: Jean-Paul Calderone --- docs/check_running.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/check_running.py b/docs/check_running.py index 55aae0015..2705f1721 100644 --- a/docs/check_running.py +++ b/docs/check_running.py @@ -38,9 +38,9 @@ def can_spawn_tahoe(pidfile): except psutil.NoSuchProcess: pass - # the file is stale - pidfile.unlink() - return True + # the file is stale + pidfile.unlink() + return True from pathlib import Path From ca522a5293c8f2e38e9b8d2071fc3865907f4177 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 10:07:44 -0600 Subject: [PATCH 68/91] sys.argv not inline --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index f8211ec02..00c87ce08 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -642,14 +642,14 @@ class PidFileLocking(SyncTestCase): f.write( "\n".join([ "import filelock, time, sys", - "with filelock.FileLock(r'{}', timeout=1):".format(lockfile.path), + "with filelock.FileLock(sys.argv[1], timeout=1):", " sys.stdout.write('.\\n')", " sys.stdout.flush()", " time.sleep(10)", ]) ) proc = Popen( - [sys.executable, "other_lock.py"], + [sys.executable, "other_lock.py", lockfile.path], stdout=PIPE, stderr=PIPE, start_new_session=True, From bef71978b6e7181598fc30f1b94d56b0b7e6a7c5 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 10:08:13 -0600 Subject: [PATCH 69/91] don't need start_new_session --- src/allmydata/test/test_runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 00c87ce08..74e3f803e 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -652,7 +652,6 @@ class PidFileLocking(SyncTestCase): [sys.executable, "other_lock.py", lockfile.path], stdout=PIPE, stderr=PIPE, - start_new_session=True, ) # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) From 2a3b110d53146d86709a8e161d90e65d6a07f0fe Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 30 Sep 2022 16:48:23 -0600 Subject: [PATCH 70/91] simple build automation --- Makefile | 44 ++++++++++++++++++++++++++++++++++++++++++++ setup.py | 4 ++++ 2 files changed, 48 insertions(+) diff --git a/Makefile b/Makefile index 5cbd863a3..6dd2b743b 100644 --- a/Makefile +++ b/Makefile @@ -224,3 +224,47 @@ src/allmydata/_version.py: .tox/create-venvs.log: tox.ini setup.py tox --notest -p all | tee -a "$(@)" + + +# Make a new release. TODO: +# - clean checkout necessary? garbage in tarball? +release: + @echo "Is checkout clean?" + git diff-files --quiet + git diff-index --quiet --cached HEAD -- + + @echo "Install required build software" + python3 -m pip install --editable .[build] + + @echo "Test README" + python3 setup.py check -r -s + + @echo "Update NEWS" + python3 -m towncrier build --yes --version `python3 misc/build_helpers/update-version.py --no-tag` + git add -u + git commit -m "update NEWS for release" + + @echo "Bump version and create tag" + python3 misc/build_helpers/update-version.py + + @echo "Build and sign wheel" + python3 setup.py bdist_wheel + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl + ls dist/*`git describe --abbrev=0`* + + @echo "Build and sign source-dist" + python3 setup.py sdist + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz + ls dist/*`git describe --abbrev=0`* + +release-test: + gpg --verify dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc + gpg --verify dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc + virtualenv testmf_venv + testmf_venv/bin/pip install dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl + testmf_venv/bin/tahoe-lafs --version +# ... + rm -rf testmf_venv + +release-upload: + twine upload dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc diff --git a/setup.py b/setup.py index d99831347..ffe23a7b5 100644 --- a/setup.py +++ b/setup.py @@ -380,6 +380,10 @@ setup(name="tahoe-lafs", # also set in __init__.py # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2392 for some # discussion. ':sys_platform=="win32"': ["pywin32 != 226"], + "build": [ + "dulwich", + "gpg", + ], "test": [ "flake8", # Pin a specific pyflakes so we don't have different folks From 4b708d87bd0bd6d07517a113c065e0f0329b8d34 Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 30 Sep 2022 16:53:48 -0600 Subject: [PATCH 71/91] wip --- Makefile | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 6dd2b743b..66f1819ad 100644 --- a/Makefile +++ b/Makefile @@ -239,6 +239,9 @@ release: @echo "Test README" python3 setup.py check -r -s +# XXX make branch, based on a ticket (provided how?) +# XXX or, specify that "make release" must run on such a branch "XXXX.tahoe-release" + @echo "Update NEWS" python3 -m towncrier build --yes --version `python3 misc/build_helpers/update-version.py --no-tag` git add -u @@ -249,22 +252,22 @@ release: @echo "Build and sign wheel" python3 setup.py bdist_wheel - gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl - ls dist/*`git describe --abbrev=0`* + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl + ls dist/*`git describe | cut -b 12-`* @echo "Build and sign source-dist" python3 setup.py sdist - gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz - ls dist/*`git describe --abbrev=0`* + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz + ls dist/*`git describe | cut -b 12-`* release-test: - gpg --verify dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc - gpg --verify dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc + gpg --verify dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc + gpg --verify dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc virtualenv testmf_venv - testmf_venv/bin/pip install dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl + testmf_venv/bin/pip install dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl testmf_venv/bin/tahoe-lafs --version # ... rm -rf testmf_venv release-upload: - twine upload dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc + twine upload dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc From 4137d6ebb7b73de4782e0d332485684cf3585376 Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 30 Sep 2022 17:20:19 -0600 Subject: [PATCH 72/91] proper smoke-test --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 66f1819ad..5ad676e86 100644 --- a/Makefile +++ b/Makefile @@ -260,13 +260,13 @@ release: gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz ls dist/*`git describe | cut -b 12-`* +# basically just a bare-minimum smoke-test that it installs and runs release-test: gpg --verify dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc gpg --verify dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc virtualenv testmf_venv testmf_venv/bin/pip install dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl - testmf_venv/bin/tahoe-lafs --version -# ... + testmf_venv/bin/tahoe --version rm -rf testmf_venv release-upload: From 923f456d6e9f53ecb6db67c73a999f72027b2655 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 1 Oct 2022 14:47:19 -0600 Subject: [PATCH 73/91] all upload steps --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 5ad676e86..b68b788ca 100644 --- a/Makefile +++ b/Makefile @@ -270,4 +270,6 @@ release-test: rm -rf testmf_venv release-upload: + scp dist/*`git describe | cut -b 12-`* meejah@tahoe-lafs.org:/home/source/downloads + git push origin_push tahoe-lafs-`git describe | cut -b 12-` twine upload dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc From c711b5b0a9c825d6e1bfb3a30437da380a63b422 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 13:33:05 -0600 Subject: [PATCH 74/91] clean docs --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index b68b788ca..8b34fd0e1 100644 --- a/Makefile +++ b/Makefile @@ -233,6 +233,9 @@ release: git diff-files --quiet git diff-index --quiet --cached HEAD -- + @echo "Clean docs build area" + rm -rf docs/_build/ + @echo "Install required build software" python3 -m pip install --editable .[build] From 3d3dc187646f0ba2203f73eecf2c927147400884 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 14:34:42 -0600 Subject: [PATCH 75/91] better instructions --- Makefile | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8b34fd0e1..c501ba3c5 100644 --- a/Makefile +++ b/Makefile @@ -226,8 +226,16 @@ src/allmydata/_version.py: tox --notest -p all | tee -a "$(@)" -# Make a new release. TODO: -# - clean checkout necessary? garbage in tarball? +# to make a new release: +# - create a ticket for the release in Trac +# - ensure local copy is up-to-date +# - create a branch like "XXXX.release" from up-to-date master +# - in the branch, run "make release" +# - run "make release-test" +# - perform any other sanity-checks on the release +# - run "make release-upload" +# Note that several commands below hard-code "meejah"; if you are +# someone else please adjust them. release: @echo "Is checkout clean?" git diff-files --quiet From a22be070b8ff9b4e05af9f28e61d209f64fcdeb2 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 14:51:29 -0600 Subject: [PATCH 76/91] version-updating script --- misc/build_helpers/update-version.py | 96 ++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 misc/build_helpers/update-version.py diff --git a/misc/build_helpers/update-version.py b/misc/build_helpers/update-version.py new file mode 100644 index 000000000..38baf7c7c --- /dev/null +++ b/misc/build_helpers/update-version.py @@ -0,0 +1,96 @@ +# +# this updates the (tagged) version of the software +# +# Any "options" are hard-coded in here (e.g. the GnuPG key to use) +# + +author = "meejah " + + +import sys +import time +import itertools +from datetime import datetime +from packaging.version import Version + +from dulwich.repo import Repo +from dulwich.porcelain import ( + tag_list, + tag_create, + status, +) + +from twisted.internet.task import ( + react, +) +from twisted.internet.defer import ( + ensureDeferred, +) + + +def existing_tags(git): + versions = sorted( + Version(v.decode("utf8").lstrip("tahoe-lafs-")) + for v in tag_list(git) + if v.startswith(b"tahoe-lafs-") + ) + return versions + + +def create_new_version(git): + versions = existing_tags(git) + biggest = versions[-1] + + return Version( + "{}.{}.{}".format( + biggest.major, + biggest.minor + 1, + 0, + ) + ) + + +async def main(reactor): + git = Repo(".") + + st = status(git) + if any(st.staged.values()) or st.unstaged: + print("unclean checkout; aborting") + raise SystemExit(1) + + v = create_new_version(git) + if "--no-tag" in sys.argv: + print(v) + return + + print("Existing tags: {}".format("\n".join(str(x) for x in existing_tags(git)))) + print("New tag will be {}".format(v)) + + # the "tag time" is seconds from the epoch .. we quantize these to + # the start of the day in question, in UTC. + now = datetime.now() + s = now.utctimetuple() + ts = int( + time.mktime( + time.struct_time((s.tm_year, s.tm_mon, s.tm_mday, 0, 0, 0, 0, s.tm_yday, 0)) + ) + ) + tag_create( + repo=git, + tag="tahoe-lafs-{}".format(str(v)).encode("utf8"), + author=author.encode("utf8"), + message="Release {}".format(v).encode("utf8"), + annotated=True, + objectish=b"HEAD", + sign=author.encode("utf8"), + tag_time=ts, + tag_timezone=0, + ) + + print("Tag created locally, it is not pushed") + print("To push it run something like:") + print(" git push origin {}".format(v)) + + +if __name__ == "__main__": + react(lambda r: ensureDeferred(main(r))) From 1af48672e39ab6430583dbd38fcfe4fa61821d09 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 14:53:03 -0600 Subject: [PATCH 77/91] correct notes --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index c501ba3c5..c02184a36 100644 --- a/Makefile +++ b/Makefile @@ -250,14 +250,13 @@ release: @echo "Test README" python3 setup.py check -r -s -# XXX make branch, based on a ticket (provided how?) -# XXX or, specify that "make release" must run on such a branch "XXXX.tahoe-release" - @echo "Update NEWS" python3 -m towncrier build --yes --version `python3 misc/build_helpers/update-version.py --no-tag` git add -u git commit -m "update NEWS for release" +# note that this always bumps the "middle" number, e.g. from 1.17.1 -> 1.18.0 +# and produces a tag into the Git repository @echo "Bump version and create tag" python3 misc/build_helpers/update-version.py From 6bb46a832bfde84714d35625a256265e93688684 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 18:52:57 -0600 Subject: [PATCH 78/91] flake8 --- misc/build_helpers/update-version.py | 1 - 1 file changed, 1 deletion(-) diff --git a/misc/build_helpers/update-version.py b/misc/build_helpers/update-version.py index 38baf7c7c..75b22edae 100644 --- a/misc/build_helpers/update-version.py +++ b/misc/build_helpers/update-version.py @@ -9,7 +9,6 @@ author = "meejah " import sys import time -import itertools from datetime import datetime from packaging.version import Version From 402d80710caa5aa50f0a5bef79ae979a75dc3594 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 19:03:10 -0600 Subject: [PATCH 79/91] news --- newsfragments/3846.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3846.feature diff --git a/newsfragments/3846.feature b/newsfragments/3846.feature new file mode 100644 index 000000000..fd321eaf0 --- /dev/null +++ b/newsfragments/3846.feature @@ -0,0 +1 @@ +"make" based release automation From e8e43d2100f1e8dfc6bd421dffd3824e57b903d0 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 19:05:16 -0600 Subject: [PATCH 80/91] update NEWS for release --- NEWS.rst | 41 +++++++++++++++++++++++++++++++++++++ newsfragments/3327.minor | 0 newsfragments/3526.minor | 1 - newsfragments/3697.minor | 1 - newsfragments/3709.minor | 0 newsfragments/3786.minor | 1 - newsfragments/3788.minor | 0 newsfragments/3802.minor | 0 newsfragments/3816.minor | 0 newsfragments/3828.feature | 8 -------- newsfragments/3846.feature | 1 - newsfragments/3855.minor | 0 newsfragments/3858.minor | 0 newsfragments/3859.minor | 0 newsfragments/3860.minor | 0 newsfragments/3865.incompat | 1 - newsfragments/3867.minor | 0 newsfragments/3868.minor | 0 newsfragments/3871.minor | 0 newsfragments/3872.minor | 0 newsfragments/3873.incompat | 1 - newsfragments/3875.minor | 0 newsfragments/3876.minor | 0 newsfragments/3877.minor | 0 newsfragments/3879.incompat | 1 - newsfragments/3881.minor | 0 newsfragments/3882.minor | 0 newsfragments/3883.minor | 0 newsfragments/3889.minor | 0 newsfragments/3890.minor | 0 newsfragments/3891.minor | 0 newsfragments/3893.minor | 0 newsfragments/3895.minor | 0 newsfragments/3896.minor | 0 newsfragments/3898.minor | 0 newsfragments/3900.minor | 0 newsfragments/3909.minor | 0 newsfragments/3913.minor | 0 newsfragments/3915.minor | 0 newsfragments/3916.minor | 0 newsfragments/3926.incompat | 10 --------- 41 files changed, 41 insertions(+), 25 deletions(-) delete mode 100644 newsfragments/3327.minor delete mode 100644 newsfragments/3526.minor delete mode 100644 newsfragments/3697.minor delete mode 100644 newsfragments/3709.minor delete mode 100644 newsfragments/3786.minor delete mode 100644 newsfragments/3788.minor delete mode 100644 newsfragments/3802.minor delete mode 100644 newsfragments/3816.minor delete mode 100644 newsfragments/3828.feature delete mode 100644 newsfragments/3846.feature delete mode 100644 newsfragments/3855.minor delete mode 100644 newsfragments/3858.minor delete mode 100644 newsfragments/3859.minor delete mode 100644 newsfragments/3860.minor delete mode 100644 newsfragments/3865.incompat delete mode 100644 newsfragments/3867.minor delete mode 100644 newsfragments/3868.minor delete mode 100644 newsfragments/3871.minor delete mode 100644 newsfragments/3872.minor delete mode 100644 newsfragments/3873.incompat delete mode 100644 newsfragments/3875.minor delete mode 100644 newsfragments/3876.minor delete mode 100644 newsfragments/3877.minor delete mode 100644 newsfragments/3879.incompat delete mode 100644 newsfragments/3881.minor delete mode 100644 newsfragments/3882.minor delete mode 100644 newsfragments/3883.minor delete mode 100644 newsfragments/3889.minor delete mode 100644 newsfragments/3890.minor delete mode 100644 newsfragments/3891.minor delete mode 100644 newsfragments/3893.minor delete mode 100644 newsfragments/3895.minor delete mode 100644 newsfragments/3896.minor delete mode 100644 newsfragments/3898.minor delete mode 100644 newsfragments/3900.minor delete mode 100644 newsfragments/3909.minor delete mode 100644 newsfragments/3913.minor delete mode 100644 newsfragments/3915.minor delete mode 100644 newsfragments/3916.minor delete mode 100644 newsfragments/3926.incompat diff --git a/NEWS.rst b/NEWS.rst index 0f9194cc4..7b1fadb8a 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -5,6 +5,47 @@ User-Visible Changes in Tahoe-LAFS ================================== .. towncrier start line +Release 1.18.0 (2022-10-02) +''''''''''''''''''''''''''' + +Backwards Incompatible Changes +------------------------------ + +- Python 3.6 is no longer supported, as it has reached end-of-life and is no longer receiving security updates. (`#3865 `_) +- Python 3.7 or later is now required; Python 2 is no longer supported. (`#3873 `_) +- Share corruption reports stored on disk are now always encoded in UTF-8. (`#3879 `_) +- Record both the PID and the process creation-time: + + a new kind of pidfile in `running.process` records both + the PID and the creation-time of the process. This facilitates + automatic discovery of a "stale" pidfile that points to a + currently-running process. If the recorded creation-time matches + the creation-time of the running process, then it is a still-running + `tahoe run` process. Otherwise, the file is stale. + + The `twistd.pid` file is no longer present. (`#3926 `_) + + +Features +-------- + +- The implementation of SDMF and MDMF (mutables) now requires RSA keys to be exactly 2048 bits, aligning them with the specification. + + Some code existed to allow tests to shorten this and it's + conceptually possible a modified client produced mutables + with different key-sizes. However, the spec says that they + must be 2048 bits. If you happen to have a capability with + a key-size different from 2048 you may use 1.17.1 or earlier + to read the content. (`#3828 `_) +- "make" based release automation (`#3846 `_) + + +Misc/Other +---------- + +- `#3327 `_, `#3526 `_, `#3697 `_, `#3709 `_, `#3786 `_, `#3788 `_, `#3802 `_, `#3816 `_, `#3855 `_, `#3858 `_, `#3859 `_, `#3860 `_, `#3867 `_, `#3868 `_, `#3871 `_, `#3872 `_, `#3875 `_, `#3876 `_, `#3877 `_, `#3881 `_, `#3882 `_, `#3883 `_, `#3889 `_, `#3890 `_, `#3891 `_, `#3893 `_, `#3895 `_, `#3896 `_, `#3898 `_, `#3900 `_, `#3909 `_, `#3913 `_, `#3915 `_, `#3916 `_ + + Release 1.17.1 (2022-01-07) ''''''''''''''''''''''''''' diff --git a/newsfragments/3327.minor b/newsfragments/3327.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3526.minor b/newsfragments/3526.minor deleted file mode 100644 index 8b1378917..000000000 --- a/newsfragments/3526.minor +++ /dev/null @@ -1 +0,0 @@ - diff --git a/newsfragments/3697.minor b/newsfragments/3697.minor deleted file mode 100644 index 0977d8a6f..000000000 --- a/newsfragments/3697.minor +++ /dev/null @@ -1 +0,0 @@ -Added support for Python 3.10. Added support for PyPy3 (3.7 and 3.8, on Linux only). \ No newline at end of file diff --git a/newsfragments/3709.minor b/newsfragments/3709.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3786.minor b/newsfragments/3786.minor deleted file mode 100644 index ecd1a2c4e..000000000 --- a/newsfragments/3786.minor +++ /dev/null @@ -1 +0,0 @@ -Added re-structured text documentation for the OpenMetrics format statistics endpoint. diff --git a/newsfragments/3788.minor b/newsfragments/3788.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3802.minor b/newsfragments/3802.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3816.minor b/newsfragments/3816.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3828.feature b/newsfragments/3828.feature deleted file mode 100644 index d396439b0..000000000 --- a/newsfragments/3828.feature +++ /dev/null @@ -1,8 +0,0 @@ -The implementation of SDMF and MDMF (mutables) now requires RSA keys to be exactly 2048 bits, aligning them with the specification. - -Some code existed to allow tests to shorten this and it's -conceptually possible a modified client produced mutables -with different key-sizes. However, the spec says that they -must be 2048 bits. If you happen to have a capability with -a key-size different from 2048 you may use 1.17.1 or earlier -to read the content. diff --git a/newsfragments/3846.feature b/newsfragments/3846.feature deleted file mode 100644 index fd321eaf0..000000000 --- a/newsfragments/3846.feature +++ /dev/null @@ -1 +0,0 @@ -"make" based release automation diff --git a/newsfragments/3855.minor b/newsfragments/3855.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3858.minor b/newsfragments/3858.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3859.minor b/newsfragments/3859.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3860.minor b/newsfragments/3860.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3865.incompat b/newsfragments/3865.incompat deleted file mode 100644 index 59381b269..000000000 --- a/newsfragments/3865.incompat +++ /dev/null @@ -1 +0,0 @@ -Python 3.6 is no longer supported, as it has reached end-of-life and is no longer receiving security updates. \ No newline at end of file diff --git a/newsfragments/3867.minor b/newsfragments/3867.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3868.minor b/newsfragments/3868.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3871.minor b/newsfragments/3871.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3872.minor b/newsfragments/3872.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3873.incompat b/newsfragments/3873.incompat deleted file mode 100644 index da8a5fb0e..000000000 --- a/newsfragments/3873.incompat +++ /dev/null @@ -1 +0,0 @@ -Python 3.7 or later is now required; Python 2 is no longer supported. \ No newline at end of file diff --git a/newsfragments/3875.minor b/newsfragments/3875.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3876.minor b/newsfragments/3876.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3877.minor b/newsfragments/3877.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3879.incompat b/newsfragments/3879.incompat deleted file mode 100644 index ca3f24f94..000000000 --- a/newsfragments/3879.incompat +++ /dev/null @@ -1 +0,0 @@ -Share corruption reports stored on disk are now always encoded in UTF-8. \ No newline at end of file diff --git a/newsfragments/3881.minor b/newsfragments/3881.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3882.minor b/newsfragments/3882.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3883.minor b/newsfragments/3883.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3889.minor b/newsfragments/3889.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3890.minor b/newsfragments/3890.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3891.minor b/newsfragments/3891.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3893.minor b/newsfragments/3893.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3895.minor b/newsfragments/3895.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3896.minor b/newsfragments/3896.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3898.minor b/newsfragments/3898.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3900.minor b/newsfragments/3900.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3909.minor b/newsfragments/3909.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3913.minor b/newsfragments/3913.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3915.minor b/newsfragments/3915.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3916.minor b/newsfragments/3916.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3926.incompat b/newsfragments/3926.incompat deleted file mode 100644 index 674ad289c..000000000 --- a/newsfragments/3926.incompat +++ /dev/null @@ -1,10 +0,0 @@ -Record both the PID and the process creation-time: - -a new kind of pidfile in `running.process` records both -the PID and the creation-time of the process. This facilitates -automatic discovery of a "stale" pidfile that points to a -currently-running process. If the recorded creation-time matches -the creation-time of the running process, then it is a still-running -`tahoe run` process. Otherwise, the file is stale. - -The `twistd.pid` file is no longer present. \ No newline at end of file From a53420c1931b4ec9c6a40f5105a44d7d4ac0f846 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 10:49:01 -0400 Subject: [PATCH 81/91] Use known working version of i2pd. --- integration/test_i2p.py | 2 +- newsfragments/3928.minor | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 newsfragments/3928.minor diff --git a/integration/test_i2p.py b/integration/test_i2p.py index f0b06f1e2..97abb40a5 100644 --- a/integration/test_i2p.py +++ b/integration/test_i2p.py @@ -55,7 +55,7 @@ def i2p_network(reactor, temp_dir, request): proto, which("docker"), ( - "docker", "run", "-p", "7656:7656", "purplei2p/i2pd", + "docker", "run", "-p", "7656:7656", "purplei2p/i2pd:release-2.43.0", # Bad URL for reseeds, so it can't talk to other routers. "--reseed.urls", "http://localhost:1/", ), diff --git a/newsfragments/3928.minor b/newsfragments/3928.minor new file mode 100644 index 000000000..e69de29bb From ec15d58e10356016130cb7eaf97681584540a611 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 10:49:08 -0400 Subject: [PATCH 82/91] Actually clean up the container. --- integration/test_i2p.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/test_i2p.py b/integration/test_i2p.py index 97abb40a5..15f9d73cf 100644 --- a/integration/test_i2p.py +++ b/integration/test_i2p.py @@ -63,7 +63,7 @@ def i2p_network(reactor, temp_dir, request): def cleanup(): try: - proto.transport.signalProcess("KILL") + proto.transport.signalProcess("INT") util.block_with_timeout(proto.exited, reactor) except ProcessExitedAlready: pass From b86f99f0ebaa5d0239d18498f59f412967bf0b27 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:00:34 -0400 Subject: [PATCH 83/91] Make this more accurate given changes in spec. --- docs/specifications/url.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 39a830e5a..fe756208b 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -87,11 +87,13 @@ These differences are separated into distinct versions. Version 0 --------- -A Foolscap fURL is considered the canonical definition of a version 0 NURL. +In theory, a Foolscap fURL with a single netloc is considered the canonical definition of a version 0 NURL. Notably, the hash component is defined as the base32-encoded SHA1 hash of the DER form of an x509v3 certificate. A version 0 NURL is identified by the absence of the ``v=1`` fragment. +In practice, real world fURLs may have more than one netloc, so lack of version fragment will likely just involve dispatching the fURL to a different parser. + Examples ~~~~~~~~ @@ -119,7 +121,7 @@ The hash component of a version 1 NURL differs in three ways from the prior vers *all* certificate fields should be considered within the context of the relationship identified by the SPKI hash. 3. The hash is encoded using urlsafe-base64 (without padding) instead of base32. - This provides a more compact representation and minimizes the usability impacts of switching from a 160 bit hash to a 224 bit hash. + This provides a more compact representation and minimizes the usability impacts of switching from a 160 bit hash to a 256 bit hash. A version 1 NURL is identified by the presence of the ``v=1`` fragment. Though the length of the hash string (38 bytes) could also be used to differentiate it from a version 0 NURL, From b0fb72e379bcbfaabb2ae37452d9a68dd481cbea Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:02:48 -0400 Subject: [PATCH 84/91] Link to design issue. --- src/allmydata/client.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 9938ec076..ac8b03e2f 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -591,6 +591,10 @@ def anonymous_storage_enabled(config): @implementer(IStatsProducer) class _Client(node.Node, pollmixin.PollMixin): + """ + This class should be refactored; see + https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931 + """ STOREDIR = 'storage' NODETYPE = "client" @@ -661,7 +665,9 @@ class _Client(node.Node, pollmixin.PollMixin): # TODO this may be the wrong location for now? but as temporary measure # it allows us to get NURLs for testing in test_istorageserver.py. This # will eventually get fixed one way or another in - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901 + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901. See also + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931 for the bigger + # picture issue. self.storage_nurls = set() def init_stats_provider(self): From d753bb58da880a00724bd0a9c592803ee7983fca Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:05:56 -0400 Subject: [PATCH 85/91] Better type for storage_nurls. --- src/allmydata/client.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index ac8b03e2f..a31d05b9c 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -1,17 +1,9 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 - # Don't use future str to prevent leaking future's newbytes into foolscap, which they break. - from past.builtins import unicode as str +from __future__ import annotations +from typing import Optional import os, stat, time, weakref from base64 import urlsafe_b64encode from functools import partial @@ -668,7 +660,7 @@ class _Client(node.Node, pollmixin.PollMixin): # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901. See also # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931 for the bigger # picture issue. - self.storage_nurls = set() + self.storage_nurls : Optional[set] = None def init_stats_provider(self): self.stats_provider = StatsProvider(self) @@ -831,8 +823,8 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - self.storage_nurls.update( - self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) + self.storage_nurls = self.tub.negotiationClass.add_storage_server( + ss, swissnum.encode("ascii") ) announcement["anonymous-storage-FURL"] = furl From d918135a0d016f579073dac7236a3aadfab76bbf Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:10:36 -0400 Subject: [PATCH 86/91] Use parser instead of ad-hoc parser. --- src/allmydata/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index a31d05b9c..417dffed8 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -822,7 +822,7 @@ class _Client(node.Node, pollmixin.PollMixin): if anonymous_storage_enabled(self.config): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) - (_, _, swissnum) = furl.rpartition("/") + (_, _, swissnum) = decode_furl(furl) self.storage_nurls = self.tub.negotiationClass.add_storage_server( ss, swissnum.encode("ascii") ) From 5d53cd4a170cd7315b594102f705fcb9e7eec55e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:16:30 -0400 Subject: [PATCH 87/91] Nicer API. --- src/allmydata/node.py | 8 +++++--- src/allmydata/protocol_switch.py | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 597221e9b..d6cbc9e36 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -55,7 +55,7 @@ from allmydata.util.yamlutil import ( from . import ( __full_version__, ) -from .protocol_switch import support_foolscap_and_https +from .protocol_switch import create_tub_with_https_support def _common_valid_config(): @@ -708,8 +708,10 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han :param dict tub_options: every key-value pair in here will be set in the new Tub via `Tub.setOption` """ - tub = Tub(**kwargs) - support_foolscap_and_https(tub) + # We listen simulataneously for both Foolscap and HTTPS on the same port, + # so we have to create a special Foolscap Tub for that to work: + tub = create_tub_with_https_support(**kwargs) + for (name, value) in list(tub_options.items()): tub.setOption(name, value) handlers = default_connection_handlers.copy() diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index a17f3055c..2b4ce6da1 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -6,10 +6,11 @@ simple as possible, with no extra configuration needed. Listening on the same port means a user upgrading Tahoe-LAFS will automatically get HTTPS working with no additional changes. -Use ``support_foolscap_and_https()`` to create a new subclass for a ``Tub`` -instance, and then ``add_storage_server()`` on the resulting class to add the -relevant information for a storage server once it becomes available later in -the configuration process. +Use ``create_tub_with_https_support()`` creates a new ``Tub`` that has its +``negotiationClass`` modified to be a new subclass tied to that specific +``Tub`` instance. Calling ``tub.negotiationClass.add_storage_server(...)`` +then adds relevant information for a storage server once it becomes available +later in the configuration process. """ from __future__ import annotations @@ -193,14 +194,17 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): self.__dict__ = protocol.__dict__ -def support_foolscap_and_https(tub: Tub): +def create_tub_with_https_support(**kwargs) -> Tub: """ - Create a new Foolscap-or-HTTPS protocol class for a specific ``Tub`` + Create a new Tub that also supports HTTPS. + + This involves creating a new protocol switch class for the specific ``Tub`` instance. """ - the_tub = tub + the_tub = Tub(**kwargs) class FoolscapOrHttpForTub(_FoolscapOrHttps): tub = the_tub - tub.negotiationClass = FoolscapOrHttpForTub # type: ignore + the_tub.negotiationClass = FoolscapOrHttpForTub # type: ignore + return the_tub From 3034f35c7b1e1748a5d4a76f73585ced7fc1e2ff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:21:54 -0400 Subject: [PATCH 88/91] Document type expectations. --- src/allmydata/storage/http_server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 540675cc7..eefb9b906 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -4,7 +4,7 @@ HTTP server for storage. from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any, Callable, Union +from typing import Dict, List, Set, Tuple, Any, Callable, Union, cast from functools import wraps from base64 import b64decode import binascii @@ -19,6 +19,7 @@ from twisted.internet.interfaces import ( IStreamServerEndpoint, IPullProducer, ) +from twisted.internet.address import IPv4Address, IPv6Address from twisted.internet.defer import Deferred from twisted.internet.ssl import CertificateOptions, Certificate, PrivateCertificate from twisted.web.server import Site, Request @@ -911,9 +912,10 @@ def listen_tls( endpoint = _TLSEndpointWrapper.from_paths(endpoint, private_key_path, cert_path) def get_nurl(listening_port: IListeningPort) -> DecodedURL: + address = cast(Union[IPv4Address, IPv6Address], listening_port.getHost()) return build_nurl( hostname, - listening_port.getHost().port, + address.port, str(server._swissnum, "ascii"), load_pem_x509_certificate(cert_path.getContent()), ) From 58247799c1e5f12a2692be0dc72325484a38a6f6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:27:19 -0400 Subject: [PATCH 89/91] Fix remaining references to refactored-out-of-existence API. --- src/allmydata/protocol_switch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 2b4ce6da1..b0af84c33 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -53,13 +53,13 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): since these are created by Foolscap's ``Tub``, by setting this to be the tub's ``negotiationClass``. - Do not instantiate directly, use ``support_foolscap_and_https(tub)`` + Do not instantiate directly, use ``create_tub_with_https_support(...)`` instead. The way this class works is that a new subclass is created for a specific ``Tub`` instance. """ # These are class attributes; they will be set by - # support_foolscap_and_https() and add_storage_server(). + # create_tub_with_https_support() and add_storage_server(). # The Twisted HTTPS protocol factory wrapping the storage server HTTP API: https_factory: TLSMemoryBIOFactory From 795ec0b2dbc6e5f9d5de23fb64d8148b47025ccc Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:52:07 -0400 Subject: [PATCH 90/91] Fix flake8 issue. --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index d99831347..c8a9669cb 100644 --- a/setup.py +++ b/setup.py @@ -382,6 +382,9 @@ setup(name="tahoe-lafs", # also set in __init__.py ':sys_platform=="win32"': ["pywin32 != 226"], "test": [ "flake8", + # On Python 3.7, importlib_metadata v5 breaks flake8. + # https://github.com/python/importlib_metadata/issues/407 + "importlib_metadata<5; python_version < '3.8'", # Pin a specific pyflakes so we don't have different folks # disagreeing on what is or is not a lint issue. We can bump # this version from time to time, but we will do it From c13be0c89b8df744ff46b1b163e4b9138451169c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 09:19:48 -0400 Subject: [PATCH 91/91] Try harder to cleanup. --- src/allmydata/test/test_storage_https.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/test_storage_https.py b/src/allmydata/test/test_storage_https.py index 3b41e8308..bacb40290 100644 --- a/src/allmydata/test/test_storage_https.py +++ b/src/allmydata/test/test_storage_https.py @@ -198,6 +198,10 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") + # We keep getting TLSMemoryBIOProtocol being left around, so try harder + # to wait for it to finish. + await deferLater(reactor, 0.001) + # A potential attack to test is a private key that doesn't match the # certificate... but OpenSSL (quite rightly) won't let you listen with that # so I don't know how to test that! See