From 113eeb0e5908887aac8b03bd59a23fdc6999a3c5 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 May 2022 10:21:55 -0400 Subject: [PATCH 001/289] News file. --- newsfragments/3891.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3891.minor diff --git a/newsfragments/3891.minor b/newsfragments/3891.minor new file mode 100644 index 000000000..e69de29bb From c1ce74f88d346d92299af11c11ab01789d368c4e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 May 2022 11:03:14 -0400 Subject: [PATCH 002/289] Ability to list shares, enabling more of IStorageClient to run over HTTP. --- docs/proposed/http-storage-node-protocol.rst | 2 +- src/allmydata/storage/http_client.py | 20 ++++++++++++ src/allmydata/storage/http_server.py | 14 ++++++++ src/allmydata/storage/server.py | 34 +++++++++++++------- src/allmydata/storage_client.py | 5 +-- src/allmydata/test/test_istorageserver.py | 1 - 6 files changed, 60 insertions(+), 16 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 3926d9f4a..693ce9290 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -738,7 +738,7 @@ Reading ``GET /v1/mutable/:storage_index/shares`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Retrieve a list indicating all shares available for the indicated storage index. +Retrieve a set indicating all shares available for the indicated storage index. For example:: [1, 5] diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index da350e0c6..5920d5a5b 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -106,6 +106,11 @@ _SCHEMAS = { share_number = uint """ ), + "mutable_list_shares": Schema( + """ + response = #6.258([* uint]) + """ + ), } @@ -720,3 +725,18 @@ class StorageClientMutables: return read_share_chunk( self._client, "mutable", storage_index, share_number, offset, length ) + + @async_to_deferred + async def list_shares(self, storage_index: bytes) -> set[int]: + """ + List the share numbers for a given storage index. + """ + # TODO unit test all the things + url = self._client.relative_url( + "/v1/mutable/{}/shares".format(_encode_si(storage_index)) + ) + response = await self._client.request("GET", url) + if response.code == http.OK: + return await _decode_cbor(response, _SCHEMAS["mutable_list_shares"]) + else: + raise ClientException(response.code) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 0169d1463..0b407a1c4 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -645,6 +645,20 @@ class HTTPServer(object): ) return data + @_authorized_route( + _app, + set(), + "/v1/mutable//shares", + methods=["GET"], + ) + def list_mutable_shares(self, request, authorization, storage_index): + """List mutable shares for a storage index.""" + try: + shares = self._storage_server.list_mutable_shares(storage_index) + except KeyError: + raise _HTTPError(http.NOT_FOUND) + return self._send_encoded(request, shares) + @implementer(IStreamServerEndpoint) @attr.s diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 9d1a3d6a4..1a0255601 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -1,18 +1,9 @@ """ Ported to Python 3. """ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import bytes_to_native_str, PY2 -if PY2: - # Omit open() to get native behavior where open("w") always accepts native - # strings. Omit bytes so we don't leak future's custom bytes. - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, pow, round, super, dict, list, object, range, str, max, min # noqa: F401 -else: - from typing import Dict, Tuple +from __future__ import annotations +from future.utils import bytes_to_native_str +from typing import Dict, Tuple import os, re @@ -699,6 +690,25 @@ class StorageServer(service.MultiService): self) return share + def list_mutable_shares(self, storage_index) -> set[int]: + """List all share numbers for the given mutable. + + Raises ``KeyError`` if the storage index is not known. + """ + # TODO unit test + si_dir = storage_index_to_dir(storage_index) + # shares exist if there is a file for them + bucketdir = os.path.join(self.sharedir, si_dir) + if not os.path.isdir(bucketdir): + raise KeyError("Not found") + result = set() + for sharenum_s in os.listdir(bucketdir): + try: + result.add(int(sharenum_s)) + except ValueError: + continue + return result + def slot_readv(self, storage_index, shares, readv): start = self._clock.seconds() self.count("readv") diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 68164e697..8b2f68a9e 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1196,9 +1196,10 @@ class _HTTPStorageServer(object): mutable_client = StorageClientMutables(self._http_client) pending_reads = {} reads = {} - # TODO if shares list is empty, that means list all shares, so we need + # If shares list is empty, that means list all shares, so we need # to do a query to get that. - assert shares # TODO replace with call to list shares if and only if it's empty + if not shares: + shares = yield mutable_client.list_shares(storage_index) # Start all the queries in parallel: for share_number in shares: diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index e7b869713..d9fd13acb 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -1154,5 +1154,4 @@ class HTTPMutableAPIsTests( "test_add_lease_renewal", "test_add_new_lease", "test_advise_corrupt_share", - "test_slot_readv_no_shares", } From 852162ba0694f0405666d432d39b464f646eeca0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 May 2022 11:03:35 -0400 Subject: [PATCH 003/289] More accurate docs. --- src/allmydata/storage/http_client.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 5920d5a5b..2db28dc72 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -380,16 +380,14 @@ def read_share_chunk( """ Download a chunk of data from a share. - TODO https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3857 Failed - downloads should be transparently retried and redownloaded by the - implementation a few times so that if a failure percolates up, the - caller can assume the failure isn't a short-term blip. + TODO https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3857 Failed downloads + should be transparently retried and redownloaded by the implementation a + few times so that if a failure percolates up, the caller can assume the + failure isn't a short-term blip. - NOTE: the underlying HTTP protocol is much more flexible than this API, - so a future refactor may expand this in order to simplify the calling - code and perhaps download data more efficiently. But then again maybe - the HTTP protocol will be simplified, see - https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3777 + NOTE: the underlying HTTP protocol is somewhat more flexible than this API, + insofar as it doesn't always require a range. In practice a range is + always provided by the current callers. """ url = client.relative_url( "/v1/{}/{}/{}".format(share_type, _encode_si(storage_index), share_number) @@ -717,7 +715,7 @@ class StorageClientMutables: share_number: int, offset: int, length: int, - ) -> bytes: + ) -> Deferred[bytes]: """ Download a chunk of data from a share. """ From 06029d2878b6ad6edc67ae79dc1f59124c6934f0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 4 May 2022 11:25:13 -0400 Subject: [PATCH 004/289] Another end-to-end test passing (albeit with ugly implementation). --- src/allmydata/storage/http_client.py | 6 +++++ src/allmydata/storage/http_server.py | 33 ++++++++++++++--------- src/allmydata/test/test_istorageserver.py | 1 - 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 2db28dc72..0229bef03 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -706,6 +706,12 @@ class StorageClientMutables: if response.code == http.OK: result = await _decode_cbor(response, _SCHEMAS["mutable_read_test_write"]) return ReadTestWriteResult(success=result["success"], reads=result["data"]) + elif response.code == http.UNAUTHORIZED: + # TODO mabye we can fix this to be nicer at some point? Custom + # exception? + from foolscap.api import RemoteException + + raise RemoteException("Authorization failed") else: raise ClientException(response.code, (await response.content())) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 0b407a1c4..748790a72 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -46,6 +46,7 @@ from .common import si_a2b from .immutable import BucketWriter, ConflictingWriteError from ..util.hashutil import timing_safe_compare from ..util.base32 import rfc3548_alphabet +from allmydata.interfaces import BadWriteEnablerError class ClientSecretsException(Exception): @@ -587,19 +588,25 @@ class HTTPServer(object): authorization[Secrets.LEASE_RENEW], authorization[Secrets.LEASE_CANCEL], ) - success, read_data = self._storage_server.slot_testv_and_readv_and_writev( - storage_index, - secrets, - { - k: ( - [(d["offset"], d["size"], b"eq", d["specimen"]) for d in v["test"]], - [(d["offset"], d["data"]) for d in v["write"]], - v["new-length"], - ) - for (k, v) in rtw_request["test-write-vectors"].items() - }, - [(d["offset"], d["size"]) for d in rtw_request["read-vector"]], - ) + try: + success, read_data = self._storage_server.slot_testv_and_readv_and_writev( + storage_index, + secrets, + { + k: ( + [ + (d["offset"], d["size"], b"eq", d["specimen"]) + for d in v["test"] + ], + [(d["offset"], d["data"]) for d in v["write"]], + v["new-length"], + ) + for (k, v) in rtw_request["test-write-vectors"].items() + }, + [(d["offset"], d["size"]) for d in rtw_request["read-vector"]], + ) + except BadWriteEnablerError: + raise _HTTPError(http.UNAUTHORIZED) return self._send_encoded(request, {"success": success, "data": read_data}) @_authorized_route( diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index d9fd13acb..a3e75bbac 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -1150,7 +1150,6 @@ class HTTPMutableAPIsTests( # TODO will be implemented in later tickets SKIP_TESTS = { - "test_STARAW_write_enabler_must_match", "test_add_lease_renewal", "test_add_new_lease", "test_advise_corrupt_share", From 2833bec80e7e6ff069b4b6eee890f99942c3dfc4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 5 May 2022 12:04:45 -0400 Subject: [PATCH 005/289] Unit test the new storage server backend API. --- src/allmydata/storage/server.py | 1 - src/allmydata/test/test_storage.py | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 1a0255601..b46303cd8 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -695,7 +695,6 @@ class StorageServer(service.MultiService): Raises ``KeyError`` if the storage index is not known. """ - # TODO unit test si_dir = storage_index_to_dir(storage_index) # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index b37f74c24..8f1ece401 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1315,6 +1315,31 @@ class MutableServer(unittest.TestCase): self.failUnless(isinstance(readv_data, dict)) self.failUnlessEqual(len(readv_data), 0) + def test_list_mutable_shares(self): + """ + ``StorageServer.list_mutable_shares()`` returns a set of share numbers + for the given storage index, or raises ``KeyError`` if it does not exist at all. + """ + ss = self.create("test_list_mutable_shares") + + # Initially, nothing exists: + with self.assertRaises(KeyError): + ss.list_mutable_shares(b"si1") + + self.allocate(ss, b"si1", b"we1", b"le1", [0, 1, 4, 2], 12) + shares0_1_2_4 = ss.list_mutable_shares(b"si1") + + # Remove share 2, by setting size to 0: + secrets = (self.write_enabler(b"we1"), + self.renew_secret(b"le1"), + self.cancel_secret(b"le1")) + ss.slot_testv_and_readv_and_writev(b"si1", secrets, {2: ([], [], 0)}, []) + shares0_1_4 = ss.list_mutable_shares(b"si1") + self.assertEqual( + (shares0_1_2_4, shares0_1_4), + ({0, 1, 2, 4}, {0, 1, 4}) + ) + def test_bad_magic(self): ss = self.create("test_bad_magic") self.allocate(ss, b"si1", b"we1", next(self._lease_secret), set([0]), 10) From b3fed56c00d03599b4a8479e5de0a36c696c7a97 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 5 May 2022 12:11:09 -0400 Subject: [PATCH 006/289] Move Foolscap compatibility to a better place. --- src/allmydata/storage/http_client.py | 6 ------ src/allmydata/storage_client.py | 16 +++++++++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 0229bef03..2db28dc72 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -706,12 +706,6 @@ class StorageClientMutables: if response.code == http.OK: result = await _decode_cbor(response, _SCHEMAS["mutable_read_test_write"]) return ReadTestWriteResult(success=result["success"], reads=result["data"]) - elif response.code == http.UNAUTHORIZED: - # TODO mabye we can fix this to be nicer at some point? Custom - # exception? - from foolscap.api import RemoteException - - raise RemoteException("Authorization failed") else: raise ClientException(response.code, (await response.content())) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 8b2f68a9e..cd489a307 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -50,6 +50,7 @@ from zope.interface import ( Interface, implementer, ) +from twisted.web import http from twisted.internet import defer from twisted.application import service from twisted.plugin import ( @@ -78,7 +79,7 @@ from allmydata.util.dictutil import BytesKeyDict, UnicodeKeyDict from allmydata.storage.http_client import ( StorageClient, StorageClientImmutables, StorageClientGeneral, ClientException as HTTPClientException, StorageClientMutables, - ReadVector, TestWriteVectors, WriteVector, TestVector + ReadVector, TestWriteVectors, WriteVector, TestVector, ClientException ) @@ -1247,8 +1248,13 @@ class _HTTPStorageServer(object): ReadVector(offset=offset, size=size) for (offset, size) in r_vector ] - client_result = yield mutable_client.read_test_write_chunks( - storage_index, we_secret, lr_secret, lc_secret, client_tw_vectors, - client_read_vectors, - ) + try: + client_result = yield mutable_client.read_test_write_chunks( + storage_index, we_secret, lr_secret, lc_secret, client_tw_vectors, + client_read_vectors, + ) + except ClientException as e: + if e.code == http.UNAUTHORIZED: + raise RemoteException("Unauthorized write, possibly you passed the wrong write enabler?") + raise return (client_result.success, client_result.reads) From 5b0762d3a3a8fa1eb98aa6cd3b5b4d14e53047a3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 10 May 2022 13:59:58 -0400 Subject: [PATCH 007/289] Workaround for autobahn issues. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c84d0ecde..2b4fd6988 100644 --- a/setup.py +++ b/setup.py @@ -114,7 +114,7 @@ install_requires = [ "attrs >= 18.2.0", # WebSocket library for twisted and asyncio - "autobahn >= 19.5.2", + "autobahn < 22.4.1", # remove this when https://github.com/crossbario/autobahn-python/issues/1566 is fixed # Support for Python 3 transition "future >= 0.18.2", From 6f5a0e43ebceb730c97598b4fbe49f65f052e44b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 10:41:36 -0400 Subject: [PATCH 008/289] Implement advise_corrupt_share for mutables. --- src/allmydata/storage/http_client.py | 51 ++++++++++++++++------- src/allmydata/storage/http_server.py | 20 +++++++++ src/allmydata/storage_client.py | 12 +++--- src/allmydata/test/test_istorageserver.py | 1 - 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 2db28dc72..f39ed7d1a 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -406,6 +406,30 @@ def read_share_chunk( raise ClientException(response.code) +@async_to_deferred +async def advise_corrupt_share( + client: StorageClient, + share_type: str, + storage_index: bytes, + share_number: int, + reason: str, +): + assert isinstance(reason, str) + url = client.relative_url( + "/v1/{}/{}/{}/corrupt".format( + share_type, _encode_si(storage_index), share_number + ) + ) + message = {"reason": reason} + response = await client.request("POST", url, message_to_serialize=message) + if response.code == http.OK: + return + else: + raise ClientException( + response.code, + ) + + @define class StorageClientImmutables(object): """ @@ -579,7 +603,6 @@ class StorageClientImmutables(object): else: raise ClientException(response.code) - @inlineCallbacks def advise_corrupt_share( self, storage_index: bytes, @@ -587,20 +610,9 @@ class StorageClientImmutables(object): reason: str, ): """Indicate a share has been corrupted, with a human-readable message.""" - assert isinstance(reason, str) - url = self._client.relative_url( - "/v1/immutable/{}/{}/corrupt".format( - _encode_si(storage_index), share_number - ) + return advise_corrupt_share( + self._client, "immutable", storage_index, share_number, reason ) - message = {"reason": reason} - response = yield self._client.request("POST", url, message_to_serialize=message) - if response.code == http.OK: - return - else: - raise ClientException( - response.code, - ) @frozen @@ -738,3 +750,14 @@ class StorageClientMutables: return await _decode_cbor(response, _SCHEMAS["mutable_list_shares"]) else: raise ClientException(response.code) + + def advise_corrupt_share( + self, + storage_index: bytes, + share_number: int, + reason: str, + ): + """Indicate a share has been corrupted, with a human-readable message.""" + return advise_corrupt_share( + self._client, "mutable", storage_index, share_number, reason + ) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 748790a72..102a33e90 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -666,6 +666,26 @@ class HTTPServer(object): raise _HTTPError(http.NOT_FOUND) return self._send_encoded(request, shares) + @_authorized_route( + _app, + set(), + "/v1/mutable///corrupt", + methods=["POST"], + ) + def advise_corrupt_share_mutable( + self, request, authorization, storage_index, share_number + ): + """Indicate that given share is corrupt, with a text reason.""" + # TODO unit test all the paths + if not self._storage_server._share_exists(storage_index, share_number): + raise _HTTPError(http.NOT_FOUND) + + info = self._read_encoded(request, _SCHEMAS["advise_corrupt_share"]) + self._storage_server.advise_corrupt_share( + b"mutable", storage_index, share_number, info["reason"].encode("utf-8") + ) + return b"" + @implementer(IStreamServerEndpoint) @attr.s diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index cd489a307..c83527600 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1185,12 +1185,14 @@ class _HTTPStorageServer(object): reason: bytes ): if share_type == b"immutable": - imm_client = StorageClientImmutables(self._http_client) - return imm_client.advise_corrupt_share( - storage_index, shnum, str(reason, "utf-8", errors="backslashreplace") - ) + client = StorageClientImmutables(self._http_client) + elif share_type == b"mutable": + client = StorageClientMutables(self._http_client) else: - raise NotImplementedError() # future tickets + raise ValueError("Unknown share type") + return client.advise_corrupt_share( + storage_index, shnum, str(reason, "utf-8", errors="backslashreplace") + ) @defer.inlineCallbacks def slot_readv(self, storage_index, shares, readv): diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index a3e75bbac..70543cbf0 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -1152,5 +1152,4 @@ class HTTPMutableAPIsTests( SKIP_TESTS = { "test_add_lease_renewal", "test_add_new_lease", - "test_advise_corrupt_share", } From 7ae682af27d878f7b07e4a0e533efe105348da95 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 10:41:56 -0400 Subject: [PATCH 009/289] News file. --- newsfragments/3893.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3893.minor diff --git a/newsfragments/3893.minor b/newsfragments/3893.minor new file mode 100644 index 000000000..e69de29bb From 4afe3eb224d6f26ac768aaccbca68c69035d57d4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 10:58:13 -0400 Subject: [PATCH 010/289] Clarify sets vs lists some more. --- docs/proposed/http-storage-node-protocol.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 693ce9290..7e0b4a542 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -350,8 +350,10 @@ Because of the simple types used throughout and the equivalence described in `RFC 7049`_ these examples should be representative regardless of which of these two encodings is chosen. +The one exception is sets. For CBOR messages, any sequence that is semantically a set (i.e. no repeated values allowed, order doesn't matter, and elements are hashable in Python) should be sent as a set. Tag 6.258 is used to indicate sets in CBOR; see `the CBOR registry `_ for more details. +Sets will be represented as JSON lists in examples because JSON doesn't support sets. HTTP Design ~~~~~~~~~~~ @@ -739,7 +741,7 @@ Reading !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a set indicating all shares available for the indicated storage index. -For example:: +For example (this is shown as list, since it will be list for JSON, but will be set for CBOR):: [1, 5] From 07e16b80b5df22a5620d390cb34032a55e62e795 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 11:00:05 -0400 Subject: [PATCH 011/289] Better name. --- src/allmydata/storage/http_server.py | 4 ++-- src/allmydata/storage/server.py | 2 +- src/allmydata/test/test_storage.py | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 748790a72..96e906e43 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -658,10 +658,10 @@ class HTTPServer(object): "/v1/mutable//shares", methods=["GET"], ) - def list_mutable_shares(self, request, authorization, storage_index): + def enumerate_mutable_shares(self, request, authorization, storage_index): """List mutable shares for a storage index.""" try: - shares = self._storage_server.list_mutable_shares(storage_index) + shares = self._storage_server.enumerate_mutable_shares(storage_index) except KeyError: raise _HTTPError(http.NOT_FOUND) return self._send_encoded(request, shares) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index b46303cd8..ab7947bf9 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -690,7 +690,7 @@ class StorageServer(service.MultiService): self) return share - def list_mutable_shares(self, storage_index) -> set[int]: + def enumerate_mutable_shares(self, storage_index) -> set[int]: """List all share numbers for the given mutable. Raises ``KeyError`` if the storage index is not known. diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 8f1ece401..9bc218fa6 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1315,26 +1315,26 @@ class MutableServer(unittest.TestCase): self.failUnless(isinstance(readv_data, dict)) self.failUnlessEqual(len(readv_data), 0) - def test_list_mutable_shares(self): + def test_enumerate_mutable_shares(self): """ - ``StorageServer.list_mutable_shares()`` returns a set of share numbers + ``StorageServer.enumerate_mutable_shares()`` returns a set of share numbers for the given storage index, or raises ``KeyError`` if it does not exist at all. """ - ss = self.create("test_list_mutable_shares") + ss = self.create("test_enumerate_mutable_shares") # Initially, nothing exists: with self.assertRaises(KeyError): - ss.list_mutable_shares(b"si1") + ss.enumerate_mutable_shares(b"si1") self.allocate(ss, b"si1", b"we1", b"le1", [0, 1, 4, 2], 12) - shares0_1_2_4 = ss.list_mutable_shares(b"si1") + shares0_1_2_4 = ss.enumerate_mutable_shares(b"si1") # Remove share 2, by setting size to 0: secrets = (self.write_enabler(b"we1"), self.renew_secret(b"le1"), self.cancel_secret(b"le1")) ss.slot_testv_and_readv_and_writev(b"si1", secrets, {2: ([], [], 0)}, []) - shares0_1_4 = ss.list_mutable_shares(b"si1") + shares0_1_4 = ss.enumerate_mutable_shares(b"si1") self.assertEqual( (shares0_1_2_4, shares0_1_4), ({0, 1, 2, 4}, {0, 1, 4}) From 6d412a017c34fc6f6528c89b1443d9bdf7caee64 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 11:00:46 -0400 Subject: [PATCH 012/289] Type annotation. --- src/allmydata/storage/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index ab7947bf9..f1b835780 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -690,7 +690,7 @@ class StorageServer(service.MultiService): self) return share - def enumerate_mutable_shares(self, storage_index) -> set[int]: + def enumerate_mutable_shares(self, storage_index: bytes) -> set[int]: """List all share numbers for the given mutable. Raises ``KeyError`` if the storage index is not known. From 4b62ec082bed7ecc33612741dfbffc16868ca3ff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 11:11:24 -0400 Subject: [PATCH 013/289] Match Foolscap behavior for slot_readv of unknown storage index. --- src/allmydata/storage_client.py | 8 +++++++- src/allmydata/test/test_istorageserver.py | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index cd489a307..83a1233f5 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1200,7 +1200,13 @@ class _HTTPStorageServer(object): # If shares list is empty, that means list all shares, so we need # to do a query to get that. if not shares: - shares = yield mutable_client.list_shares(storage_index) + try: + shares = yield mutable_client.list_shares(storage_index) + except ClientException as e: + if e.code == http.NOT_FOUND: + shares = set() + else: + raise # Start all the queries in parallel: for share_number in shares: diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index a3e75bbac..66535ddda 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -854,6 +854,22 @@ class IStorageServerMutableAPIsTestsMixin(object): {0: [b"abcdefg"], 1: [b"0123456"], 2: [b"9876543"]}, ) + @inlineCallbacks + def test_slot_readv_unknown_storage_index(self): + """ + With unknown storage index, ``IStorageServer.slot_readv()`` TODO. + """ + storage_index = new_storage_index() + reads = yield self.storage_client.slot_readv( + storage_index, + shares=[], + readv=[(0, 7)], + ) + self.assertEqual( + reads, + {}, + ) + @inlineCallbacks def create_slot(self): """Create a slot with sharenum 0.""" From 457db8f992c62e8f5c5bc4cdcb6e99f097062f08 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 11:17:57 -0400 Subject: [PATCH 014/289] Get rid of the "no such storage index" edge case, since it's not really necessary. --- src/allmydata/storage/http_server.py | 5 +---- src/allmydata/storage/server.py | 7 ++----- src/allmydata/storage_client.py | 8 +------- src/allmydata/test/test_storage.py | 12 ++++++------ 4 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 96e906e43..a1641f563 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -660,10 +660,7 @@ class HTTPServer(object): ) def enumerate_mutable_shares(self, request, authorization, storage_index): """List mutable shares for a storage index.""" - try: - shares = self._storage_server.enumerate_mutable_shares(storage_index) - except KeyError: - raise _HTTPError(http.NOT_FOUND) + shares = self._storage_server.enumerate_mutable_shares(storage_index) return self._send_encoded(request, shares) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index f1b835780..bcf44dc30 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -691,15 +691,12 @@ class StorageServer(service.MultiService): return share def enumerate_mutable_shares(self, storage_index: bytes) -> set[int]: - """List all share numbers for the given mutable. - - Raises ``KeyError`` if the storage index is not known. - """ + """Return all share numbers for the given mutable.""" si_dir = storage_index_to_dir(storage_index) # shares exist if there is a file for them bucketdir = os.path.join(self.sharedir, si_dir) if not os.path.isdir(bucketdir): - raise KeyError("Not found") + return set() result = set() for sharenum_s in os.listdir(bucketdir): try: diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 83a1233f5..cd489a307 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1200,13 +1200,7 @@ class _HTTPStorageServer(object): # If shares list is empty, that means list all shares, so we need # to do a query to get that. if not shares: - try: - shares = yield mutable_client.list_shares(storage_index) - except ClientException as e: - if e.code == http.NOT_FOUND: - shares = set() - else: - raise + shares = yield mutable_client.list_shares(storage_index) # Start all the queries in parallel: for share_number in shares: diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 9bc218fa6..65d09de25 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1317,14 +1317,14 @@ class MutableServer(unittest.TestCase): def test_enumerate_mutable_shares(self): """ - ``StorageServer.enumerate_mutable_shares()`` returns a set of share numbers - for the given storage index, or raises ``KeyError`` if it does not exist at all. + ``StorageServer.enumerate_mutable_shares()`` returns a set of share + numbers for the given storage index, or an empty set if it does not + exist at all. """ ss = self.create("test_enumerate_mutable_shares") # Initially, nothing exists: - with self.assertRaises(KeyError): - ss.enumerate_mutable_shares(b"si1") + empty = ss.enumerate_mutable_shares(b"si1") self.allocate(ss, b"si1", b"we1", b"le1", [0, 1, 4, 2], 12) shares0_1_2_4 = ss.enumerate_mutable_shares(b"si1") @@ -1336,8 +1336,8 @@ class MutableServer(unittest.TestCase): ss.slot_testv_and_readv_and_writev(b"si1", secrets, {2: ([], [], 0)}, []) shares0_1_4 = ss.enumerate_mutable_shares(b"si1") self.assertEqual( - (shares0_1_2_4, shares0_1_4), - ({0, 1, 2, 4}, {0, 1, 4}) + (empty, shares0_1_2_4, shares0_1_4), + (set(), {0, 1, 2, 4}, {0, 1, 4}) ) def test_bad_magic(self): From 821bac3ddf4bc829d4a0724404242974fa2f1d0a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 11:50:01 -0400 Subject: [PATCH 015/289] Test another lease edge case. --- src/allmydata/storage_client.py | 16 ++++++++++++---- src/allmydata/test/test_istorageserver.py | 15 +++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index c83527600..e8d0e003a 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -76,6 +76,7 @@ from allmydata.util.observer import ObserverList from allmydata.util.rrefutil import add_version_to_remote_reference from allmydata.util.hashutil import permute_server_hash from allmydata.util.dictutil import BytesKeyDict, UnicodeKeyDict +from allmydata.util.deferredutil import async_to_deferred from allmydata.storage.http_client import ( StorageClient, StorageClientImmutables, StorageClientGeneral, ClientException as HTTPClientException, StorageClientMutables, @@ -1166,16 +1167,23 @@ class _HTTPStorageServer(object): for share_num in share_numbers }) - def add_lease( + @async_to_deferred + async def add_lease( self, storage_index, renew_secret, cancel_secret ): immutable_client = StorageClientImmutables(self._http_client) - return immutable_client.add_or_renew_lease( - storage_index, renew_secret, cancel_secret - ) + try: + await immutable_client.add_or_renew_lease( + storage_index, renew_secret, cancel_secret + ) + except ClientException as e: + if e.code == http.NOT_FOUND: + # Silently do nothing, as is the case for the Foolscap client + return + raise def advise_corrupt_share( self, diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index abb2e0fc4..cee80f8fb 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -459,6 +459,21 @@ class IStorageServerImmutableAPIsTestsMixin(object): lease.get_expiration_time() - self.fake_time() > (31 * 24 * 60 * 60 - 10) ) + @inlineCallbacks + def test_add_lease_non_existent(self): + """ + If the storage index doesn't exist, adding the lease silently does nothing. + """ + storage_index = new_storage_index() + self.assertEqual(list(self.server.get_leases(storage_index)), []) + + renew_secret = new_secret() + cancel_secret = new_secret() + + # Add a lease: + yield self.storage_client.add_lease(storage_index, renew_secret, cancel_secret) + self.assertEqual(list(self.server.get_leases(storage_index)), []) + @inlineCallbacks def test_add_lease_renewal(self): """ From b8735c79daefb751b4d81ba7631a81b37904b732 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 11:50:29 -0400 Subject: [PATCH 016/289] Fix docstring. --- src/allmydata/test/test_istorageserver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index cee80f8fb..c0dd50590 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -872,7 +872,8 @@ class IStorageServerMutableAPIsTestsMixin(object): @inlineCallbacks def test_slot_readv_unknown_storage_index(self): """ - With unknown storage index, ``IStorageServer.slot_readv()`` TODO. + With unknown storage index, ``IStorageServer.slot_readv()`` returns + empty dict. """ storage_index = new_storage_index() reads = yield self.storage_client.slot_readv( From f3cf13154da2c03e39fa3249384ec6e01ef90735 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 11 May 2022 12:00:27 -0400 Subject: [PATCH 017/289] Setup HTTP lease APIs for immutables too. --- src/allmydata/storage/http_client.py | 50 +++++++++++------------ src/allmydata/storage/http_server.py | 2 +- src/allmydata/storage_client.py | 2 +- src/allmydata/test/test_istorageserver.py | 6 --- src/allmydata/test/test_storage_http.py | 7 ++-- 5 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index f39ed7d1a..167d2394a 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -355,6 +355,31 @@ class StorageClientGeneral(object): decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"]) returnValue(decoded_response) + @inlineCallbacks + def add_or_renew_lease( + self, storage_index: bytes, renew_secret: bytes, cancel_secret: bytes + ): + """ + Add or renew a lease. + + If the renewal secret matches an existing lease, it is renewed. + Otherwise a new lease is added. + """ + url = self._client.relative_url( + "/v1/lease/{}".format(_encode_si(storage_index)) + ) + response = yield self._client.request( + "PUT", + url, + lease_renew_secret=renew_secret, + lease_cancel_secret=cancel_secret, + ) + + if response.code == http.NO_CONTENT: + return + else: + raise ClientException(response.code) + @define class UploadProgress(object): @@ -578,31 +603,6 @@ class StorageClientImmutables(object): else: raise ClientException(response.code) - @inlineCallbacks - def add_or_renew_lease( - self, storage_index: bytes, renew_secret: bytes, cancel_secret: bytes - ): - """ - Add or renew a lease. - - If the renewal secret matches an existing lease, it is renewed. - Otherwise a new lease is added. - """ - url = self._client.relative_url( - "/v1/lease/{}".format(_encode_si(storage_index)) - ) - response = yield self._client.request( - "PUT", - url, - lease_renew_secret=renew_secret, - lease_cancel_secret=cancel_secret, - ) - - if response.code == http.NO_CONTENT: - return - else: - raise ClientException(response.code) - def advise_corrupt_share( self, storage_index: bytes, diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index db73b6a86..709c1fda5 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -539,7 +539,7 @@ class HTTPServer(object): ) def add_or_renew_lease(self, request, authorization, storage_index): """Update the lease for an immutable share.""" - if not self._storage_server.get_buckets(storage_index): + if not list(self._storage_server._get_bucket_shares(storage_index)): raise _HTTPError(http.NOT_FOUND) # Checking of the renewal secret is done by the backend. diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index e8d0e003a..c529c4513 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1174,7 +1174,7 @@ class _HTTPStorageServer(object): renew_secret, cancel_secret ): - immutable_client = StorageClientImmutables(self._http_client) + immutable_client = StorageClientGeneral(self._http_client) try: await immutable_client.add_or_renew_lease( storage_index, renew_secret, cancel_secret diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index c0dd50590..39675336f 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -1179,9 +1179,3 @@ class HTTPMutableAPIsTests( _HTTPMixin, IStorageServerMutableAPIsTestsMixin, AsyncTestCase ): """HTTP-specific tests for mutable ``IStorageServer`` APIs.""" - - # TODO will be implemented in later tickets - SKIP_TESTS = { - "test_add_lease_renewal", - "test_add_new_lease", - } diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index df781012e..fcc2401f2 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -448,6 +448,7 @@ class ImmutableHTTPAPITests(SyncTestCase): super(ImmutableHTTPAPITests, self).setUp() self.http = self.useFixture(HttpTestFixture()) self.imm_client = StorageClientImmutables(self.http.client) + self.general_client = StorageClientGeneral(self.http.client) def create_upload(self, share_numbers, length): """ @@ -1081,7 +1082,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # We renew the lease: result_of( - self.imm_client.add_or_renew_lease( + self.general_client.add_or_renew_lease( storage_index, lease_secret, lease_secret ) ) @@ -1092,7 +1093,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # We create a new lease: lease_secret2 = urandom(32) result_of( - self.imm_client.add_or_renew_lease( + self.general_client.add_or_renew_lease( storage_index, lease_secret2, lease_secret2 ) ) @@ -1108,7 +1109,7 @@ class ImmutableHTTPAPITests(SyncTestCase): storage_index = urandom(16) secret = b"A" * 32 with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of(self.imm_client.add_or_renew_lease(storage_index, secret, secret)) + result_of(self.general_client.add_or_renew_lease(storage_index, secret, secret)) def test_advise_corrupt_share(self): """ From a54b443f9d2658cb6e196570a7a74681a8bec44d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 12 May 2022 09:44:30 -0400 Subject: [PATCH 018/289] It's not an immutable client anymore. --- src/allmydata/storage_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index c529c4513..0f66e8e4a 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1174,9 +1174,9 @@ class _HTTPStorageServer(object): renew_secret, cancel_secret ): - immutable_client = StorageClientGeneral(self._http_client) + client = StorageClientGeneral(self._http_client) try: - await immutable_client.add_or_renew_lease( + await client.add_or_renew_lease( storage_index, renew_secret, cancel_secret ) except ClientException as e: From b0b67826e8c9a026879c68c04c13d2cce9a6466e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 13 May 2022 12:58:55 -0400 Subject: [PATCH 019/289] More verbose output is helpful when debugging. --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 859cf18e0..fc95a0469 100644 --- a/tox.ini +++ b/tox.ini @@ -97,7 +97,7 @@ setenv = COVERAGE_PROCESS_START=.coveragerc commands = # NOTE: 'run with "py.test --keep-tempdir -s -v integration/" to debug failures' - py.test --timeout=1800 --coverage -v {posargs:integration} + py.test --timeout=1800 --coverage -s -v {posargs:integration} coverage combine coverage report From 20b021809c0a2cf3bd2abf5991de5638b48134b9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 13 May 2022 12:59:04 -0400 Subject: [PATCH 020/289] Fix(?) the intermittently failing test. --- integration/test_tor.py | 6 +++++- newsfragments/3895.minor | 0 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 newsfragments/3895.minor diff --git a/integration/test_tor.py b/integration/test_tor.py index b0419f0d2..5b701287c 100644 --- a/integration/test_tor.py +++ b/integration/test_tor.py @@ -21,7 +21,8 @@ from . import util from twisted.python.filepath import ( FilePath, ) - +from twisted.internet.task import deferLater +from twisted.internet import reactor from allmydata.test.common import ( write_introducer, ) @@ -68,6 +69,9 @@ def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_ne cap = proto.output.getvalue().strip().split()[-1] print("TEH CAP!", cap) + # For some reason a wait is needed, or sometimes the get fails... + yield deferLater(reactor, 2, lambda: None) + proto = util._CollectOutputProtocol(capture_stderr=False) reactor.spawnProcess( proto, diff --git a/newsfragments/3895.minor b/newsfragments/3895.minor new file mode 100644 index 000000000..e69de29bb From 757b4492d75c899d21809ff51f3383189c7eb3c7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 13 May 2022 13:29:08 -0400 Subject: [PATCH 021/289] A more semantically correct fix. --- integration/test_tor.py | 16 ++++++++-------- integration/util.py | 9 +++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/integration/test_tor.py b/integration/test_tor.py index 5b701287c..d17e0f5cf 100644 --- a/integration/test_tor.py +++ b/integration/test_tor.py @@ -21,8 +21,7 @@ from . import util from twisted.python.filepath import ( FilePath, ) -from twisted.internet.task import deferLater -from twisted.internet import reactor + from allmydata.test.common import ( write_introducer, ) @@ -41,8 +40,11 @@ if PY2: @pytest_twisted.inlineCallbacks def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl): - yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) - yield _create_anonymous_node(reactor, 'dave', 8009, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) + carol = yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) + dave = yield _create_anonymous_node(reactor, 'dave', 8009, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) + util.await_client_ready(carol, expected_number_of_servers=2) + util.await_client_ready(dave, expected_number_of_servers=2) + # ensure both nodes are connected to "a grid" by uploading # something via carol, and retrieve it using dave. gold_path = join(temp_dir, "gold") @@ -69,9 +71,6 @@ def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_ne cap = proto.output.getvalue().strip().split()[-1] print("TEH CAP!", cap) - # For some reason a wait is needed, or sometimes the get fails... - yield deferLater(reactor, 2, lambda: None) - proto = util._CollectOutputProtocol(capture_stderr=False) reactor.spawnProcess( proto, @@ -147,5 +146,6 @@ shares.total = 2 f.write(node_config) print("running") - yield util._run_node(reactor, node_dir.path, request, None) + result = yield util._run_node(reactor, node_dir.path, request, None) print("okay, launched") + return result diff --git a/integration/util.py b/integration/util.py index 7c7a1efd2..0ec824f82 100644 --- a/integration/util.py +++ b/integration/util.py @@ -482,14 +482,15 @@ def web_post(tahoe, uri_fragment, **kwargs): return resp.content -def await_client_ready(tahoe, timeout=10, liveness=60*2): +def await_client_ready(tahoe, timeout=10, liveness=60*2, expected_number_of_servers=1): """ Uses the status API to wait for a client-type node (in `tahoe`, a `TahoeProcess` instance usually from a fixture e.g. `alice`) to be 'ready'. A client is deemed ready if: - it answers `http:///statistics/?t=json/` - - there is at least one storage-server connected + - there is at least one storage-server connected (configurable via + ``expected_number_of_servers``) - every storage-server has a "last_received_data" and it is within the last `liveness` seconds @@ -506,8 +507,8 @@ def await_client_ready(tahoe, timeout=10, liveness=60*2): time.sleep(1) continue - if len(js['servers']) == 0: - print("waiting because no servers at all") + if len(js['servers']) != expected_number_of_servers: + print("waiting because insufficient servers") time.sleep(1) continue server_times = [ From f752f547ba50e283a13abac8b9764cef305ad2a0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 13 May 2022 13:30:47 -0400 Subject: [PATCH 022/289] More servers is fine. --- integration/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integration/util.py b/integration/util.py index 0ec824f82..ad9249e45 100644 --- a/integration/util.py +++ b/integration/util.py @@ -482,7 +482,7 @@ def web_post(tahoe, uri_fragment, **kwargs): return resp.content -def await_client_ready(tahoe, timeout=10, liveness=60*2, expected_number_of_servers=1): +def await_client_ready(tahoe, timeout=10, liveness=60*2, minimum_number_of_servers=1): """ Uses the status API to wait for a client-type node (in `tahoe`, a `TahoeProcess` instance usually from a fixture e.g. `alice`) to be @@ -490,7 +490,7 @@ def await_client_ready(tahoe, timeout=10, liveness=60*2, expected_number_of_serv - it answers `http:///statistics/?t=json/` - there is at least one storage-server connected (configurable via - ``expected_number_of_servers``) + ``minimum_number_of_servers``) - every storage-server has a "last_received_data" and it is within the last `liveness` seconds @@ -507,7 +507,7 @@ def await_client_ready(tahoe, timeout=10, liveness=60*2, expected_number_of_serv time.sleep(1) continue - if len(js['servers']) != expected_number_of_servers: + if len(js['servers']) < minimum_number_of_servers: print("waiting because insufficient servers") time.sleep(1) continue From 69f1244c5a85914535008911b231b1483fdee953 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 13 May 2022 13:42:10 -0400 Subject: [PATCH 023/289] Fix keyword argument name. --- integration/test_tor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/test_tor.py b/integration/test_tor.py index d17e0f5cf..c78fa8098 100644 --- a/integration/test_tor.py +++ b/integration/test_tor.py @@ -42,8 +42,8 @@ if PY2: def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl): carol = yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) dave = yield _create_anonymous_node(reactor, 'dave', 8009, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) - util.await_client_ready(carol, expected_number_of_servers=2) - util.await_client_ready(dave, expected_number_of_servers=2) + util.await_client_ready(carol, minimum_number_of_servers=2) + util.await_client_ready(dave, minimum_number_of_servers=2) # ensure both nodes are connected to "a grid" by uploading # something via carol, and retrieve it using dave. From 3abf992321c62728cf194090380dc46c32dc0156 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 13 May 2022 14:05:53 -0400 Subject: [PATCH 024/289] Autobahn regression workaround. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c84d0ecde..2b4fd6988 100644 --- a/setup.py +++ b/setup.py @@ -114,7 +114,7 @@ install_requires = [ "attrs >= 18.2.0", # WebSocket library for twisted and asyncio - "autobahn >= 19.5.2", + "autobahn < 22.4.1", # remove this when https://github.com/crossbario/autobahn-python/issues/1566 is fixed # Support for Python 3 transition "future >= 0.18.2", From da4deab167187ec4baf02f84da8e8ae7e03a6a8a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 16 May 2022 11:19:46 -0400 Subject: [PATCH 025/289] Note version with fix. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2b4fd6988..d07031cd9 100644 --- a/setup.py +++ b/setup.py @@ -114,7 +114,7 @@ install_requires = [ "attrs >= 18.2.0", # WebSocket library for twisted and asyncio - "autobahn < 22.4.1", # remove this when https://github.com/crossbario/autobahn-python/issues/1566 is fixed + "autobahn < 22.4.1", # remove this when 22.4.3 is released # Support for Python 3 transition "future >= 0.18.2", From d209065a6e680eb3e42e4e5bad89655d4e3d7ec0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 16 May 2022 11:22:44 -0400 Subject: [PATCH 026/289] Fix type issue, and modernize slightly. --- src/allmydata/storage_client.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 0f66e8e4a..c63bfccff 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -5,10 +5,6 @@ the foolscap-based server implemented in src/allmydata/storage/*.py . Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals # roadmap: # @@ -34,14 +30,10 @@ from __future__ import unicode_literals # # 6: implement other sorts of IStorageClient classes: S3, etc -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from six import ensure_text - +from typing import Union import re, time, hashlib from os import urandom -# On Python 2 this will be the backport. from configparser import NoSectionError import attr @@ -1193,7 +1185,7 @@ class _HTTPStorageServer(object): reason: bytes ): if share_type == b"immutable": - client = StorageClientImmutables(self._http_client) + client : Union[StorageClientImmutables, StorageClientMutables] = StorageClientImmutables(self._http_client) elif share_type == b"mutable": client = StorageClientMutables(self._http_client) else: From 32a11662a277b976af08194a42358e727fdf8ee8 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 12:55:55 -0400 Subject: [PATCH 027/289] Install a specific version. --- integration/install-tor.sh | 2 +- integration/test_tor.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/integration/install-tor.sh b/integration/install-tor.sh index 66fa64cb1..97a7f9465 100755 --- a/integration/install-tor.sh +++ b/integration/install-tor.sh @@ -791,4 +791,4 @@ keSPmmDrjl8cySCNsMo= EOF ${SUDO} apt-get --quiet update -${SUDO} apt-get --quiet --yes install tor deb.torproject.org-keyring +${SUDO} apt-get --quiet --yes install tor=0.4.4.5-1 deb.torproject.org-keyring diff --git a/integration/test_tor.py b/integration/test_tor.py index c78fa8098..e1b25e161 100644 --- a/integration/test_tor.py +++ b/integration/test_tor.py @@ -40,6 +40,7 @@ if PY2: @pytest_twisted.inlineCallbacks def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl): + import time; time.sleep(3) carol = yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) dave = yield _create_anonymous_node(reactor, 'dave', 8009, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) util.await_client_ready(carol, minimum_number_of_servers=2) From 04198cdb73f8db0a3e9d3aeab5554e9ce15e2750 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 12:56:22 -0400 Subject: [PATCH 028/289] News file. --- newsfragments/3898.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3898.minor diff --git a/newsfragments/3898.minor b/newsfragments/3898.minor new file mode 100644 index 000000000..e69de29bb From d6abefb041b58df8b019a11abda47c8dc1d6efd2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 12:57:29 -0400 Subject: [PATCH 029/289] Temporary always build images. --- .circleci/config.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 79ce57ed0..c285263f3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -68,14 +68,14 @@ workflows: images: # Build the Docker images used by the ci jobs. This makes the ci jobs # faster and takes various spurious failures out of the critical path. - triggers: - # Build once a day - - schedule: - cron: "0 0 * * *" - filters: - branches: - only: - - "master" + # triggers: + # # Build once a day + # - schedule: + # cron: "0 0 * * *" + # filters: + # branches: + # only: + # - "master" jobs: # Every job that pushes a Docker image from Docker Hub needs to provide From 5ef8fa5b8958d31e4b79091e30b5df8c6b3d7487 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 12:57:50 -0400 Subject: [PATCH 030/289] TEmporary only build the image we care about. --- .circleci/config.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c285263f3..7bccb01ec 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -88,16 +88,16 @@ workflows: # https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts - "build-image-debian-10": &DOCKERHUB_CONTEXT context: "dockerhub-auth" - - "build-image-debian-11": - <<: *DOCKERHUB_CONTEXT - - "build-image-ubuntu-18-04": - <<: *DOCKERHUB_CONTEXT - - "build-image-ubuntu-20-04": - <<: *DOCKERHUB_CONTEXT - - "build-image-fedora-35": - <<: *DOCKERHUB_CONTEXT - - "build-image-oraclelinux-8": - <<: *DOCKERHUB_CONTEXT + # - "build-image-debian-11": + # <<: *DOCKERHUB_CONTEXT + # - "build-image-ubuntu-18-04": + # <<: *DOCKERHUB_CONTEXT + # - "build-image-ubuntu-20-04": + # <<: *DOCKERHUB_CONTEXT + # - "build-image-fedora-35": + # <<: *DOCKERHUB_CONTEXT + # - "build-image-oraclelinux-8": + # <<: *DOCKERHUB_CONTEXT # Restore later as PyPy38 #- "build-image-pypy27-buster": # <<: *DOCKERHUB_CONTEXT From 33c43cb2b3da13916b2926e2c6f6692a413058f8 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 13:01:57 -0400 Subject: [PATCH 031/289] Try a different variant. --- integration/install-tor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/install-tor.sh b/integration/install-tor.sh index 97a7f9465..e4ec45e78 100755 --- a/integration/install-tor.sh +++ b/integration/install-tor.sh @@ -791,4 +791,4 @@ keSPmmDrjl8cySCNsMo= EOF ${SUDO} apt-get --quiet update -${SUDO} apt-get --quiet --yes install tor=0.4.4.5-1 deb.torproject.org-keyring +${SUDO} apt-get --quiet --yes install tor=0.4.4.5 deb.torproject.org-keyring From 9bef8f4abdb4a2f61b6e57f5b8476ce53835b708 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 13:07:40 -0400 Subject: [PATCH 032/289] This appears to be the alternative to latest version :( --- integration/install-tor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/install-tor.sh b/integration/install-tor.sh index e4ec45e78..9a8fc500e 100755 --- a/integration/install-tor.sh +++ b/integration/install-tor.sh @@ -791,4 +791,4 @@ keSPmmDrjl8cySCNsMo= EOF ${SUDO} apt-get --quiet update -${SUDO} apt-get --quiet --yes install tor=0.4.4.5 deb.torproject.org-keyring +${SUDO} apt-get --quiet --yes install tor=0.3.5.16-1 deb.torproject.org-keyring From 012693f6b2ae7e28daf543971d9c2341b891620f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 13:19:13 -0400 Subject: [PATCH 033/289] Build a different image for now. --- .circleci/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7bccb01ec..0120c6b15 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -86,10 +86,10 @@ workflows: # Contexts are managed in the CircleCI web interface: # # https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts - - "build-image-debian-10": &DOCKERHUB_CONTEXT - context: "dockerhub-auth" - # - "build-image-debian-11": - # <<: *DOCKERHUB_CONTEXT + # - "build-image-debian-10": &DOCKERHUB_CONTEXT + # context: "dockerhub-auth" + - "build-image-debian-11": + <<: *DOCKERHUB_CONTEXT # - "build-image-ubuntu-18-04": # <<: *DOCKERHUB_CONTEXT # - "build-image-ubuntu-20-04": From 28e10d127aaac62be063258b6d46c7e5451a761a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 13:20:37 -0400 Subject: [PATCH 034/289] Do integration tests with more modern image. --- .circleci/config.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0120c6b15..7a21a7941 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,8 +18,7 @@ workflows: - "debian-10": {} - "debian-11": - requires: - - "debian-10" + {} - "ubuntu-20-04": {} @@ -58,7 +57,7 @@ workflows: requires: # If the unit test suite doesn't pass, don't bother running the # integration tests. - - "debian-10" + - "debian-11" - "typechecks": {} @@ -297,6 +296,10 @@ jobs: integration: <<: *DEBIAN + docker: + - <<: *DOCKERHUB_AUTH + image: "tahoelafsci/debian:11-py3.9" + user: "nobody" environment: <<: *UTF_8_ENVIRONMENT From 90a6cf18ac2960a73b0fd455353e828f1b4ebd54 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 13:20:44 -0400 Subject: [PATCH 035/289] Just use system Tor, for more stability. --- .circleci/Dockerfile.debian | 8 +- integration/install-tor.sh | 794 ------------------------------------ 2 files changed, 2 insertions(+), 800 deletions(-) delete mode 100755 integration/install-tor.sh diff --git a/.circleci/Dockerfile.debian b/.circleci/Dockerfile.debian index f12f19551..abab1f4fa 100644 --- a/.circleci/Dockerfile.debian +++ b/.circleci/Dockerfile.debian @@ -18,15 +18,11 @@ RUN apt-get --quiet update && \ libffi-dev \ libssl-dev \ libyaml-dev \ - virtualenv + virtualenv \ + tor # Get the project source. This is better than it seems. CircleCI will # *update* this checkout on each job run, saving us more time per-job. COPY . ${BUILD_SRC_ROOT} RUN "${BUILD_SRC_ROOT}"/.circleci/prepare-image.sh "${WHEELHOUSE_PATH}" "${VIRTUALENV_PATH}" "${BUILD_SRC_ROOT}" "python${PYTHON_VERSION}" - -# Only the integration tests currently need this but it doesn't hurt to always -# have it present and it's simpler than building a whole extra image just for -# the integration tests. -RUN ${BUILD_SRC_ROOT}/integration/install-tor.sh diff --git a/integration/install-tor.sh b/integration/install-tor.sh deleted file mode 100755 index 9a8fc500e..000000000 --- a/integration/install-tor.sh +++ /dev/null @@ -1,794 +0,0 @@ -#!/bin/bash - -# https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ -set -euxo pipefail - -CODENAME=$(lsb_release --short --codename) - -if [ "$(id -u)" != "0" ]; then - SUDO="sudo" -else - SUDO="" -fi - -# Script to install Tor -echo "deb http://deb.torproject.org/torproject.org ${CODENAME} main" | ${SUDO} tee -a /etc/apt/sources.list -echo "deb-src http://deb.torproject.org/torproject.org ${CODENAME} main" | ${SUDO} tee -a /etc/apt/sources.list - -# # Install Tor repo signing key -${SUDO} apt-key add - < Date: Wed, 18 May 2022 13:26:07 -0400 Subject: [PATCH 036/289] Make it work temporarily. --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7a21a7941..8a231ea9d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -85,8 +85,8 @@ workflows: # Contexts are managed in the CircleCI web interface: # # https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts - # - "build-image-debian-10": &DOCKERHUB_CONTEXT - # context: "dockerhub-auth" + - "build-image-debian-10": &DOCKERHUB_CONTEXT + context: "dockerhub-auth" - "build-image-debian-11": <<: *DOCKERHUB_CONTEXT # - "build-image-ubuntu-18-04": From 63e16166d7e0bb6c0bd802791a841880856c6609 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 13:43:26 -0400 Subject: [PATCH 037/289] Restore default image building setup. --- .circleci/config.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8a231ea9d..051e690b7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -67,14 +67,14 @@ workflows: images: # Build the Docker images used by the ci jobs. This makes the ci jobs # faster and takes various spurious failures out of the critical path. - # triggers: - # # Build once a day - # - schedule: - # cron: "0 0 * * *" - # filters: - # branches: - # only: - # - "master" + triggers: + # Build once a day + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - "master" jobs: # Every job that pushes a Docker image from Docker Hub needs to provide @@ -89,14 +89,14 @@ workflows: context: "dockerhub-auth" - "build-image-debian-11": <<: *DOCKERHUB_CONTEXT - # - "build-image-ubuntu-18-04": - # <<: *DOCKERHUB_CONTEXT - # - "build-image-ubuntu-20-04": - # <<: *DOCKERHUB_CONTEXT - # - "build-image-fedora-35": - # <<: *DOCKERHUB_CONTEXT - # - "build-image-oraclelinux-8": - # <<: *DOCKERHUB_CONTEXT + - "build-image-ubuntu-18-04": + <<: *DOCKERHUB_CONTEXT + - "build-image-ubuntu-20-04": + <<: *DOCKERHUB_CONTEXT + - "build-image-fedora-35": + <<: *DOCKERHUB_CONTEXT + - "build-image-oraclelinux-8": + <<: *DOCKERHUB_CONTEXT # Restore later as PyPy38 #- "build-image-pypy27-buster": # <<: *DOCKERHUB_CONTEXT From 02bbce81115eb0f4778f1a61f5a39f19d8f266c3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 18 May 2022 13:44:18 -0400 Subject: [PATCH 038/289] Get rid of spurious sleep. --- integration/test_tor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integration/test_tor.py b/integration/test_tor.py index e1b25e161..c78fa8098 100644 --- a/integration/test_tor.py +++ b/integration/test_tor.py @@ -40,7 +40,6 @@ if PY2: @pytest_twisted.inlineCallbacks def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl): - import time; time.sleep(3) carol = yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) dave = yield _create_anonymous_node(reactor, 'dave', 8009, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) util.await_client_ready(carol, minimum_number_of_servers=2) From 8c8ea4927f4c015f391913b29a47300f69cdefcd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 May 2022 11:07:55 -0400 Subject: [PATCH 039/289] Switch to public API. --- src/allmydata/storage/http_server.py | 8 +++++--- src/allmydata/storage/server.py | 25 +++++++++++++++---------- src/allmydata/test/test_repairer.py | 2 +- src/allmydata/test/test_storage.py | 4 ++-- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 709c1fda5..033f9ec4c 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -538,8 +538,8 @@ class HTTPServer(object): methods=["PUT"], ) def add_or_renew_lease(self, request, authorization, storage_index): - """Update the lease for an immutable share.""" - if not list(self._storage_server._get_bucket_shares(storage_index)): + """Update the lease for an immutable or mutable share.""" + if not list(self._storage_server.get_shares(storage_index)): raise _HTTPError(http.NOT_FOUND) # Checking of the renewal secret is done by the backend. @@ -674,7 +674,9 @@ class HTTPServer(object): ): """Indicate that given share is corrupt, with a text reason.""" # TODO unit test all the paths - if not self._storage_server._share_exists(storage_index, share_number): + if share_number not in { + shnum for (shnum, _) in self._storage_server.get_shares(storage_index) + }: raise _HTTPError(http.NOT_FOUND) info = self._read_encoded(request, _SCHEMAS["advise_corrupt_share"]) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index bcf44dc30..07b82b4d8 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -3,7 +3,7 @@ Ported to Python 3. """ from __future__ import annotations from future.utils import bytes_to_native_str -from typing import Dict, Tuple +from typing import Dict, Tuple, Iterable import os, re @@ -321,7 +321,7 @@ class StorageServer(service.MultiService): # they asked about: this will save them a lot of work. Add or update # leases for all of them: if they want us to hold shares for this # file, they'll want us to hold leases for this file. - for (shnum, fn) in self._get_bucket_shares(storage_index): + for (shnum, fn) in self.get_shares(storage_index): alreadygot[shnum] = ShareFile(fn) if renew_leases: self._add_or_renew_leases(alreadygot.values(), lease_info) @@ -363,7 +363,7 @@ class StorageServer(service.MultiService): return set(alreadygot), bucketwriters def _iter_share_files(self, storage_index): - for shnum, filename in self._get_bucket_shares(storage_index): + for shnum, filename in self.get_shares(storage_index): with open(filename, 'rb') as f: header = f.read(32) if MutableShareFile.is_valid_header(header): @@ -416,10 +416,12 @@ class StorageServer(service.MultiService): """ self._call_on_bucket_writer_close.append(handler) - def _get_bucket_shares(self, storage_index): - """Return a list of (shnum, pathname) tuples for files that hold + def get_shares(self, storage_index) -> Iterable[(int, str)]: + """ + Return an iterable of (shnum, pathname) tuples for files that hold shares for this storage_index. In each tuple, 'shnum' will always be - the integer form of the last component of 'pathname'.""" + the integer form of the last component of 'pathname'. + """ storagedir = os.path.join(self.sharedir, storage_index_to_dir(storage_index)) try: for f in os.listdir(storagedir): @@ -431,12 +433,15 @@ class StorageServer(service.MultiService): pass def get_buckets(self, storage_index): + """ + Get ``BucketReaders`` for an immutable. + """ start = self._clock.seconds() self.count("get") si_s = si_b2a(storage_index) log.msg("storage: get_buckets %r" % si_s) bucketreaders = {} # k: sharenum, v: BucketReader - for shnum, filename in self._get_bucket_shares(storage_index): + for shnum, filename in self.get_shares(storage_index): bucketreaders[shnum] = BucketReader(self, filename, storage_index, shnum) self.add_latency("get", self._clock.seconds() - start) @@ -453,7 +458,7 @@ class StorageServer(service.MultiService): # since all shares get the same lease data, we just grab the leases # from the first share try: - shnum, filename = next(self._get_bucket_shares(storage_index)) + shnum, filename = next(self.get_shares(storage_index)) sf = ShareFile(filename) return sf.get_leases() except StopIteration: @@ -467,7 +472,7 @@ class StorageServer(service.MultiService): :return: An iterable of the leases attached to this slot. """ - for _, share_filename in self._get_bucket_shares(storage_index): + for _, share_filename in self.get_shares(storage_index): share = MutableShareFile(share_filename) return share.get_leases() return [] @@ -742,7 +747,7 @@ class StorageServer(service.MultiService): :return bool: ``True`` if a share with the given number exists at the given storage index, ``False`` otherwise. """ - for existing_sharenum, ignored in self._get_bucket_shares(storage_index): + for existing_sharenum, ignored in self.get_shares(storage_index): if existing_sharenum == shnum: return True return False diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index 88696000c..f9b93af72 100644 --- a/src/allmydata/test/test_repairer.py +++ b/src/allmydata/test/test_repairer.py @@ -717,7 +717,7 @@ class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, ss = self.g.servers_by_number[0] # we want to delete the share corresponding to the server # we're making not-respond - share = next(ss._get_bucket_shares(self.c0_filenode.get_storage_index()))[0] + share = next(ss.get_shares(self.c0_filenode.get_storage_index()))[0] self.delete_shares_numbered(self.uri, [share]) return self.c0_filenode.check_and_repair(Monitor()) d.addCallback(_then) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 65d09de25..91d55790e 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -766,7 +766,7 @@ class Server(unittest.TestCase): writer.close() # It should have a lease granted at the current time. - shares = dict(ss._get_bucket_shares(storage_index)) + shares = dict(ss.get_shares(storage_index)) self.assertEqual( [first_lease], list( @@ -789,7 +789,7 @@ class Server(unittest.TestCase): writer.close() # The first share's lease expiration time is unchanged. - shares = dict(ss._get_bucket_shares(storage_index)) + shares = dict(ss.get_shares(storage_index)) self.assertEqual( [first_lease], list( From 12927d50bafdb37d7dd0a53443b5d61ee6364be7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 May 2022 11:09:04 -0400 Subject: [PATCH 040/289] Type annotation improvements. --- src/allmydata/storage/http_client.py | 2 +- src/allmydata/storage/server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 167d2394a..de9dfc518 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -358,7 +358,7 @@ class StorageClientGeneral(object): @inlineCallbacks def add_or_renew_lease( self, storage_index: bytes, renew_secret: bytes, cancel_secret: bytes - ): + ) -> Deferred[None]: """ Add or renew a lease. diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 07b82b4d8..0a1999dfb 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -416,7 +416,7 @@ class StorageServer(service.MultiService): """ self._call_on_bucket_writer_close.append(handler) - def get_shares(self, storage_index) -> Iterable[(int, str)]: + def get_shares(self, storage_index) -> Iterable[tuple[int, str]]: """ Return an iterable of (shnum, pathname) tuples for files that hold shares for this storage_index. In each tuple, 'shnum' will always be From 63624eedec9d50dd93c2812482fb6fa977e1e096 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 May 2022 11:33:02 -0400 Subject: [PATCH 041/289] Reduce code duplication. --- src/allmydata/storage/http_server.py | 53 +++++++++++++--------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 033f9ec4c..a4f67bb5e 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -2,6 +2,7 @@ HTTP server for storage. """ +from __future__ import annotations from typing import Dict, List, Set, Tuple, Any from functools import wraps @@ -273,6 +274,28 @@ _SCHEMAS = { } +# TODO unit tests? or rely on higher-level tests +def parse_range(request) -> tuple[int, int]: + """ + Parse the subset of ``Range`` headers we support: bytes only, only a single + range, the end must be explicitly specified. Raises a + ``_HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE)`` if parsing is not + possible or the header isn't set. + + Returns tuple of (start_offset, end_offset). + """ + range_header = parse_range_header(request.getHeader("range")) + if ( + range_header is None + or range_header.units != "bytes" + or len(range_header.ranges) > 1 # more than one range + or range_header.ranges[0][1] is None # range without end + ): + raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE) + + return range_header.ranges[0] + + class HTTPServer(object): """ A HTTP interface to the storage server. @@ -505,17 +528,7 @@ class HTTPServer(object): request.write(data) start += len(data) - range_header = parse_range_header(request.getHeader("range")) - if ( - range_header is None - or range_header.units != "bytes" - or len(range_header.ranges) > 1 # more than one range - or range_header.ranges[0][1] is None # range without end - ): - request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) - return b"" - - offset, end = range_header.ranges[0] + offset, end = parse_range(request) # TODO limit memory usage # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 @@ -617,23 +630,7 @@ class HTTPServer(object): ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): """Read a chunk from a mutable.""" - if request.getHeader("range") is None: - # TODO in follow-up ticket - raise NotImplementedError() - - # TODO reduce duplication with immutable reads? - # TODO unit tests, perhaps shared if possible - range_header = parse_range_header(request.getHeader("range")) - if ( - range_header is None - or range_header.units != "bytes" - or len(range_header.ranges) > 1 # more than one range - or range_header.ranges[0][1] is None # range without end - ): - request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) - return b"" - - offset, end = range_header.ranges[0] + offset, end = parse_range(request) # TODO limit memory usage # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 From 2313195c2b94db799c93083580b643c85fabef47 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 May 2022 11:43:42 -0400 Subject: [PATCH 042/289] Reduce duplication. --- src/allmydata/storage/http_server.py | 75 +++++++++++++--------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index a4f67bb5e..2ff0c6908 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -3,7 +3,7 @@ HTTP server for storage. """ from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any +from typing import Dict, List, Set, Tuple, Any, Callable from functools import wraps from base64 import b64decode @@ -275,14 +275,20 @@ _SCHEMAS = { # TODO unit tests? or rely on higher-level tests -def parse_range(request) -> tuple[int, int]: +def read_range(request, read_data: Callable[int, int, bytes]) -> bytes: """ - Parse the subset of ``Range`` headers we support: bytes only, only a single - range, the end must be explicitly specified. Raises a - ``_HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE)`` if parsing is not - possible or the header isn't set. + Parse the ``Range`` header, read appropriately, return as result. - Returns tuple of (start_offset, end_offset). + Only parses a subset of ``Range`` headers that we support: must be set, + bytes only, only a single range, the end must be explicitly specified. + Raises a ``_HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE)`` if parsing is + not possible or the header isn't set. + + Returns the bytes to return from the request handler, and sets appropriate + response headers. + + Takes a function that will do the actual reading given the start offset and + a length to read. """ range_header = parse_range_header(request.getHeader("range")) if ( @@ -293,7 +299,21 @@ def parse_range(request) -> tuple[int, int]: ): raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE) - return range_header.ranges[0] + offset, end = range_header.ranges[0] + + # TODO limit memory usage + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 + data = read_data(offset, end - offset) + + request.setResponseCode(http.PARTIAL_CONTENT) + if len(data): + # For empty bodies the content-range header makes no sense since + # the end of the range is inclusive. + request.setHeader( + "content-range", + ContentRange("bytes", offset, offset + len(data)).to_header(), + ) + return data class HTTPServer(object): @@ -528,21 +548,7 @@ class HTTPServer(object): request.write(data) start += len(data) - offset, end = parse_range(request) - - # TODO limit memory usage - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - data = bucket.read(offset, end - offset) - - request.setResponseCode(http.PARTIAL_CONTENT) - if len(data): - # For empty bodies the content-range header makes no sense since - # the end of the range is inclusive. - request.setHeader( - "content-range", - ContentRange("bytes", offset, offset + len(data)).to_header(), - ) - return data + return read_range(request, bucket.read) @_authorized_route( _app, @@ -630,24 +636,15 @@ class HTTPServer(object): ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): """Read a chunk from a mutable.""" - offset, end = parse_range(request) + if request.getHeader("range") is None: + raise NotImplementedError() # should be able to move shared implementation into read_range()... - # TODO limit memory usage - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - data = self._storage_server.slot_readv( - storage_index, [share_number], [(offset, end - offset)] - )[share_number][0] + def read_data(offset, length): + return self._storage_server.slot_readv( + storage_index, [share_number], [(offset, length)] + )[share_number][0] - # TODO reduce duplication? - request.setResponseCode(http.PARTIAL_CONTENT) - if len(data): - # For empty bodies the content-range header makes no sense since - # the end of the range is inclusive. - request.setHeader( - "content-range", - ContentRange("bytes", offset, offset + len(data)).to_header(), - ) - return data + return read_range(request, read_data) @_authorized_route( _app, From fd306b9a61b2b4806c948ca0f2b2e7fe1f447110 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 1 Jun 2022 13:54:54 -0400 Subject: [PATCH 043/289] Share more code across mutable and immutable reads. --- src/allmydata/storage/http_server.py | 38 +++++++++++++--------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 2ff0c6908..b031cbb15 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -275,7 +275,7 @@ _SCHEMAS = { # TODO unit tests? or rely on higher-level tests -def read_range(request, read_data: Callable[int, int, bytes]) -> bytes: +def read_range(request, read_data: Callable[int, int, bytes]) -> Optional[bytes]: """ Parse the ``Range`` header, read appropriately, return as result. @@ -284,15 +284,28 @@ def read_range(request, read_data: Callable[int, int, bytes]) -> bytes: Raises a ``_HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE)`` if parsing is not possible or the header isn't set. - Returns the bytes to return from the request handler, and sets appropriate - response headers. + Returns a result that should be returned from the request handler, and sets + appropriate response headers. Takes a function that will do the actual reading given the start offset and a length to read. """ + if request.getHeader("range") is None: + # Return the whole thing. + start = 0 + while True: + # TODO should probably yield to event loop occasionally... + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 + data = read_data(start, start + 65536) + if not data: + request.finish() + return + request.write(data) + start += len(data) + range_header = parse_range_header(request.getHeader("range")) if ( - range_header is None + range_header is None # failed to parse or range_header.units != "bytes" or len(range_header.ranges) > 1 # more than one range or range_header.ranges[0][1] is None # range without end @@ -535,19 +548,6 @@ class HTTPServer(object): request.setResponseCode(http.NOT_FOUND) return b"" - if request.getHeader("range") is None: - # Return the whole thing. - start = 0 - while True: - # TODO should probably yield to event loop occasionally... - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - data = bucket.read(start, start + 65536) - if not data: - request.finish() - return - request.write(data) - start += len(data) - return read_range(request, bucket.read) @_authorized_route( @@ -636,9 +636,7 @@ class HTTPServer(object): ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): """Read a chunk from a mutable.""" - if request.getHeader("range") is None: - raise NotImplementedError() # should be able to move shared implementation into read_range()... - + # TODO unit tests def read_data(offset, length): return self._storage_server.slot_readv( storage_index, [share_number], [(offset, length)] From f1384096fa26c97e5c3f88a9d8fb0212c7f34c16 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 3 Jun 2022 13:46:23 -0400 Subject: [PATCH 044/289] First unit test for mutables. --- src/allmydata/storage/http_client.py | 6 +- src/allmydata/storage/http_server.py | 3 +- src/allmydata/test/test_storage_http.py | 83 ++++++++++++++++++++++++- 3 files changed, 86 insertions(+), 6 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index de9dfc518..bf6104dea 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -7,7 +7,7 @@ from __future__ import annotations from typing import Union, Optional, Sequence, Mapping from base64 import b64encode -from attrs import define, asdict, frozen +from attrs import define, asdict, frozen, field # TODO Make sure to import Python version? from cbor2 import loads, dumps @@ -646,8 +646,8 @@ class ReadVector: class TestWriteVectors: """Test and write vectors for a specific share.""" - test_vectors: Sequence[TestVector] - write_vectors: Sequence[WriteVector] + test_vectors: Sequence[TestVector] = field(factory=list) + write_vectors: Sequence[WriteVector] = field(factory=list) new_length: Optional[int] = None def asdict(self) -> dict: diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index b031cbb15..9735a0626 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -263,7 +263,7 @@ _SCHEMAS = { * share_number: { "test": [* {"offset": uint, "size": uint, "specimen": bstr}] "write": [* {"offset": uint, "data": bstr}] - "new-length": uint // null + "new-length": uint / null } } "read-vector": [* {"offset": uint, "size": uint}] @@ -274,7 +274,6 @@ _SCHEMAS = { } -# TODO unit tests? or rely on higher-level tests def read_range(request, read_data: Callable[int, int, bytes]) -> Optional[bytes]: """ Parse the ``Range`` header, read appropriately, return as result. diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index fcc2401f2..37e3be8a7 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -40,6 +40,10 @@ from ..storage.http_client import ( UploadProgress, StorageClientGeneral, _encode_si, + StorageClientMutables, + TestWriteVectors, + WriteVector, + ReadVector, ) @@ -1109,7 +1113,9 @@ class ImmutableHTTPAPITests(SyncTestCase): storage_index = urandom(16) secret = b"A" * 32 with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of(self.general_client.add_or_renew_lease(storage_index, secret, secret)) + result_of( + self.general_client.add_or_renew_lease(storage_index, secret, secret) + ) def test_advise_corrupt_share(self): """ @@ -1142,3 +1148,78 @@ class ImmutableHTTPAPITests(SyncTestCase): result_of( self.imm_client.advise_corrupt_share(si, share_number, reason) ) + + +class MutableHTTPAPIsTests(SyncTestCase): + """Tests for mutable APIs.""" + + def setUp(self): + super(MutableHTTPAPIsTests, self).setUp() + self.http = self.useFixture(HttpTestFixture()) + self.mut_client = StorageClientMutables(self.http.client) + + def create_upload(self, data=b"abcdef"): + """ + Utility that creates shares 0 and 1 with bodies + ``{data}-{share_number}``. + """ + write_secret = urandom(32) + lease_secret = urandom(32) + storage_index = urandom(16) + result_of( + self.mut_client.read_test_write_chunks( + storage_index, + write_secret, + lease_secret, + lease_secret, + { + 0: TestWriteVectors( + write_vectors=[WriteVector(offset=0, data=data + b"-0")] + ), + 1: TestWriteVectors( + write_vectors=[ + WriteVector(offset=0, data=data), + WriteVector(offset=len(data), data=b"-1"), + ] + ), + }, + [ReadVector(0, len(data) + 2)], + ) + ) + return storage_index, write_secret, lease_secret + + def test_upload_can_be_downloaded(self): + """ + Written data can be read, both by the combo operation and a direct + read. + """ + storage_index, _, _ = self.create_upload() + data0 = result_of(self.mut_client.read_share_chunk(storage_index, 0, 1, 7)) + data1 = result_of(self.mut_client.read_share_chunk(storage_index, 1, 0, 8)) + self.assertEqual((data0, data1), (b"bcdef-0", b"abcdef-1")) + + def test_read_before_write(self): + """In combo read/test/write operation, reads happen before writes.""" + + def test_conditional_upload(self): + pass + + def test_list_shares(self): + pass + + def test_wrong_write_enabler(self): + pass + + # TODO refactor reads tests so they're shared + + def test_lease_renew_and_add(self): + pass + + def test_lease_on_unknown_storage_index(self): + pass + + def test_advise_corrupt_share(self): + pass + + def test_advise_corrupt_share_unknown(self): + pass From 3e67d2d7890ceb7f537fdfeda89072e1d99c1835 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 09:50:36 -0400 Subject: [PATCH 045/289] More tests. --- src/allmydata/test/test_storage_http.py | 78 +++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 37e3be8a7..6cf2f883b 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -44,6 +44,8 @@ from ..storage.http_client import ( TestWriteVectors, WriteVector, ReadVector, + ReadTestWriteResult, + TestVector, ) @@ -1183,15 +1185,14 @@ class MutableHTTPAPIsTests(SyncTestCase): ] ), }, - [ReadVector(0, len(data) + 2)], + [], ) ) return storage_index, write_secret, lease_secret - def test_upload_can_be_downloaded(self): + def test_write_can_be_read(self): """ - Written data can be read, both by the combo operation and a direct - read. + Written data can be read using ``read_share_chunk``. """ storage_index, _, _ = self.create_upload() data0 = result_of(self.mut_client.read_share_chunk(storage_index, 0, 1, 7)) @@ -1200,9 +1201,74 @@ class MutableHTTPAPIsTests(SyncTestCase): def test_read_before_write(self): """In combo read/test/write operation, reads happen before writes.""" + storage_index, write_secret, lease_secret = self.create_upload() + result = result_of( + self.mut_client.read_test_write_chunks( + storage_index, + write_secret, + lease_secret, + lease_secret, + { + 0: TestWriteVectors( + write_vectors=[WriteVector(offset=1, data=b"XYZ")] + ), + }, + [ReadVector(0, 8)], + ) + ) + # Reads are from before the write: + self.assertEqual( + result, + ReadTestWriteResult( + success=True, reads={0: [b"abcdef-0"], 1: [b"abcdef-1"]} + ), + ) + # But the write did happen: + data0 = result_of(self.mut_client.read_share_chunk(storage_index, 0, 0, 8)) + data1 = result_of(self.mut_client.read_share_chunk(storage_index, 1, 0, 8)) + self.assertEqual((data0, data1), (b"aXYZef-0", b"abcdef-1")) - def test_conditional_upload(self): - pass + def test_conditional_write(self): + """Uploads only happen if the test passes.""" + storage_index, write_secret, lease_secret = self.create_upload() + result_failed = result_of( + self.mut_client.read_test_write_chunks( + storage_index, + write_secret, + lease_secret, + lease_secret, + { + 0: TestWriteVectors( + test_vectors=[TestVector(1, 4, b"FAIL")], + write_vectors=[WriteVector(offset=1, data=b"XYZ")], + ), + }, + [], + ) + ) + self.assertFalse(result_failed.success) + + # This time the test matches: + result = result_of( + self.mut_client.read_test_write_chunks( + storage_index, + write_secret, + lease_secret, + lease_secret, + { + 0: TestWriteVectors( + test_vectors=[TestVector(1, 4, b"bcde")], + write_vectors=[WriteVector(offset=1, data=b"XYZ")], + ), + }, + [ReadVector(0, 8)], + ) + ) + self.assertTrue(result.success) + self.assertEqual( + result_of(self.mut_client.read_share_chunk(storage_index, 0, 0, 8)), + b"aXYZef-0", + ) def test_list_shares(self): pass From 797f34aec32eefbe495d9936d955e7ab4bdc3f1a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 09:59:12 -0400 Subject: [PATCH 046/289] More tests. --- src/allmydata/storage/http_client.py | 3 --- src/allmydata/test/test_storage_http.py | 35 +++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index bf6104dea..9711e748d 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -696,7 +696,6 @@ class StorageClientMutables: Given a mapping between share numbers and test/write vectors, the tests are done and if they are valid the writes are done. """ - # TODO unit test all the things url = self._client.relative_url( "/v1/mutable/{}/read-test-write".format(_encode_si(storage_index)) ) @@ -731,7 +730,6 @@ class StorageClientMutables: """ Download a chunk of data from a share. """ - # TODO unit test all the things return read_share_chunk( self._client, "mutable", storage_index, share_number, offset, length ) @@ -741,7 +739,6 @@ class StorageClientMutables: """ List the share numbers for a given storage index. """ - # TODO unit test all the things url = self._client.relative_url( "/v1/mutable/{}/shares".format(_encode_si(storage_index)) ) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 6cf2f883b..65aa12e40 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -1271,10 +1271,41 @@ class MutableHTTPAPIsTests(SyncTestCase): ) def test_list_shares(self): - pass + """``list_shares()`` returns the shares for a given storage index.""" + storage_index, _, _ = self.create_upload() + self.assertEqual(result_of(self.mut_client.list_shares(storage_index)), {0, 1}) + + def test_non_existent_list_shares(self): + """A non-existent storage index errors when shares are listed.""" + with self.assertRaises(ClientException) as exc: + result_of(self.mut_client.list_shares(urandom(32))) + self.assertEqual(exc.exception.code, http.NOT_FOUND) def test_wrong_write_enabler(self): - pass + """Writes with the wrong write enabler fail, and are not processed.""" + storage_index, write_secret, lease_secret = self.create_upload() + with self.assertRaises(ClientException) as exc: + result_of( + self.mut_client.read_test_write_chunks( + storage_index, + urandom(32), + lease_secret, + lease_secret, + { + 0: TestWriteVectors( + write_vectors=[WriteVector(offset=1, data=b"XYZ")] + ), + }, + [ReadVector(0, 8)], + ) + ) + self.assertEqual(exc.exception.code, http.UNAUTHORIZED) + + # The write did not happen: + self.assertEqual( + result_of(self.mut_client.read_share_chunk(storage_index, 0, 0, 8)), + b"abcdef-0", + ) # TODO refactor reads tests so they're shared From e6efb62fd19eef08f14916438240f70bc197a4c3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 10:25:06 -0400 Subject: [PATCH 047/289] Refactor immutable tests so they can shared with mutables. --- src/allmydata/test/test_storage_http.py | 460 +++++++++++++----------- 1 file changed, 246 insertions(+), 214 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 65aa12e40..fc79fbe34 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -5,7 +5,7 @@ Tests for HTTP storage client + server. from base64 import b64encode from contextlib import contextmanager from os import urandom - +from typing import Union, Callable, Tuple from cbor2 import dumps from pycddl import ValidationError as CDDLValidationError from hypothesis import assume, given, strategies as st @@ -787,141 +787,6 @@ class ImmutableHTTPAPITests(SyncTestCase): ) ) - def upload(self, share_number, data_length=26): - """ - Create a share, return (storage_index, uploaded_data). - """ - uploaded_data = (b"abcdefghijklmnopqrstuvwxyz" * ((data_length // 26) + 1))[ - :data_length - ] - (upload_secret, _, storage_index, _) = self.create_upload( - {share_number}, data_length - ) - result_of( - self.imm_client.write_share_chunk( - storage_index, - share_number, - upload_secret, - 0, - uploaded_data, - ) - ) - return storage_index, uploaded_data - - def test_read_of_wrong_storage_index_fails(self): - """ - Reading from unknown storage index results in 404. - """ - with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of( - self.imm_client.read_share_chunk( - b"1" * 16, - 1, - 0, - 10, - ) - ) - - def test_read_of_wrong_share_number_fails(self): - """ - Reading from unknown storage index results in 404. - """ - storage_index, _ = self.upload(1) - with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of( - self.imm_client.read_share_chunk( - storage_index, - 7, # different share number - 0, - 10, - ) - ) - - def test_read_with_negative_offset_fails(self): - """ - Malformed or unsupported Range headers result in 416 (requested range - not satisfiable) error. - """ - storage_index, _ = self.upload(1) - - def check_bad_range(bad_range_value): - client = StorageClientImmutables( - StorageClientWithHeadersOverride( - self.http.client, {"range": bad_range_value} - ) - ) - - with assert_fails_with_http_code( - self, http.REQUESTED_RANGE_NOT_SATISFIABLE - ): - result_of( - client.read_share_chunk( - storage_index, - 1, - 0, - 10, - ) - ) - - # Bad unit - check_bad_range("molluscs=0-9") - # Negative offsets - check_bad_range("bytes=-2-9") - check_bad_range("bytes=0--10") - # Negative offset no endpoint - check_bad_range("bytes=-300-") - check_bad_range("bytes=") - # Multiple ranges are currently unsupported, even if they're - # semantically valid under HTTP: - check_bad_range("bytes=0-5, 6-7") - # Ranges without an end are currently unsupported, even if they're - # semantically valid under HTTP. - check_bad_range("bytes=0-") - - @given(data_length=st.integers(min_value=1, max_value=300000)) - def test_read_with_no_range(self, data_length): - """ - A read with no range returns the whole immutable. - """ - storage_index, uploaded_data = self.upload(1, data_length) - response = result_of( - self.http.client.request( - "GET", - self.http.client.relative_url( - "/v1/immutable/{}/1".format(_encode_si(storage_index)) - ), - ) - ) - self.assertEqual(response.code, http.OK) - self.assertEqual(result_of(response.content()), uploaded_data) - - def test_validate_content_range_response_to_read(self): - """ - The server responds to ranged reads with an appropriate Content-Range - header. - """ - storage_index, _ = self.upload(1, 26) - - def check_range(requested_range, expected_response): - headers = Headers() - headers.setRawHeaders("range", [requested_range]) - response = result_of( - self.http.client.request( - "GET", - self.http.client.relative_url( - "/v1/immutable/{}/1".format(_encode_si(storage_index)) - ), - headers=headers, - ) - ) - self.assertEqual( - response.headers.getRawHeaders("content-range"), [expected_response] - ) - - check_range("bytes=0-10", "bytes 0-10/*") - # Can't go beyond the end of the immutable! - check_range("bytes=10-100", "bytes 10-25/*") - def test_timed_out_upload_allows_reupload(self): """ If an in-progress upload times out, it is cancelled altogether, @@ -1062,52 +927,6 @@ class ImmutableHTTPAPITests(SyncTestCase): ), ) - def test_lease_renew_and_add(self): - """ - It's possible the renew the lease on an uploaded immutable, by using - the same renewal secret, or add a new lease by choosing a different - renewal secret. - """ - # Create immutable: - (upload_secret, lease_secret, storage_index, _) = self.create_upload({0}, 100) - result_of( - self.imm_client.write_share_chunk( - storage_index, - 0, - upload_secret, - 0, - b"A" * 100, - ) - ) - - [lease] = self.http.storage_server.get_leases(storage_index) - initial_expiration_time = lease.get_expiration_time() - - # Time passes: - self.http.clock.advance(167) - - # We renew the lease: - result_of( - self.general_client.add_or_renew_lease( - storage_index, lease_secret, lease_secret - ) - ) - - # More time passes: - self.http.clock.advance(10) - - # We create a new lease: - lease_secret2 = urandom(32) - result_of( - self.general_client.add_or_renew_lease( - storage_index, lease_secret2, lease_secret2 - ) - ) - - [lease1, lease2] = self.http.storage_server.get_leases(storage_index) - self.assertEqual(lease1.get_expiration_time(), initial_expiration_time + 167) - self.assertEqual(lease2.get_expiration_time(), initial_expiration_time + 177) - def test_lease_on_unknown_storage_index(self): """ An attempt to renew an unknown storage index will result in a HTTP 404. @@ -1119,38 +938,6 @@ class ImmutableHTTPAPITests(SyncTestCase): self.general_client.add_or_renew_lease(storage_index, secret, secret) ) - def test_advise_corrupt_share(self): - """ - Advising share was corrupted succeeds from HTTP client's perspective, - and calls appropriate method on server. - """ - corrupted = [] - self.http.storage_server.advise_corrupt_share = lambda *args: corrupted.append( - args - ) - - storage_index, _ = self.upload(13) - reason = "OHNO \u1235" - result_of(self.imm_client.advise_corrupt_share(storage_index, 13, reason)) - - self.assertEqual( - corrupted, [(b"immutable", storage_index, 13, reason.encode("utf-8"))] - ) - - def test_advise_corrupt_share_unknown(self): - """ - Advising an unknown share was corrupted results in 404. - """ - storage_index, _ = self.upload(13) - reason = "OHNO \u1235" - result_of(self.imm_client.advise_corrupt_share(storage_index, 13, reason)) - - for (si, share_number) in [(storage_index, 11), (urandom(16), 13)]: - with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of( - self.imm_client.advise_corrupt_share(si, share_number, reason) - ) - class MutableHTTPAPIsTests(SyncTestCase): """Tests for mutable APIs.""" @@ -1320,3 +1107,248 @@ class MutableHTTPAPIsTests(SyncTestCase): def test_advise_corrupt_share_unknown(self): pass + + +class SharedImmutableMutableTestsMixin: + """ + Shared tests for mutables and immutables where the API is the same. + """ + + KIND: str # either "mutable" or "immutable" + general_client: StorageClientGeneral + client: Union[StorageClientImmutables, StorageClientMutables] + clientFactory: Callable[ + StorageClient, Union[StorageClientImmutables, StorageClientMutables] + ] + + def upload(self, share_number: int, data_length=26) -> Tuple[bytes, bytes, bytes]: + """ + Create a share, return (storage_index, uploaded_data, lease secret). + """ + raise NotImplementedError + + def test_advise_corrupt_share(self): + """ + Advising share was corrupted succeeds from HTTP client's perspective, + and calls appropriate method on server. + """ + corrupted = [] + self.http.storage_server.advise_corrupt_share = lambda *args: corrupted.append( + args + ) + + storage_index, _, _ = self.upload(13) + reason = "OHNO \u1235" + result_of(self.client.advise_corrupt_share(storage_index, 13, reason)) + + self.assertEqual( + corrupted, + [(self.KIND.encode("ascii"), storage_index, 13, reason.encode("utf-8"))], + ) + + def test_advise_corrupt_share_unknown(self): + """ + Advising an unknown share was corrupted results in 404. + """ + storage_index, _, _ = self.upload(13) + reason = "OHNO \u1235" + result_of(self.client.advise_corrupt_share(storage_index, 13, reason)) + + for (si, share_number) in [(storage_index, 11), (urandom(16), 13)]: + with assert_fails_with_http_code(self, http.NOT_FOUND): + result_of(self.client.advise_corrupt_share(si, share_number, reason)) + + def test_lease_renew_and_add(self): + """ + It's possible the renew the lease on an uploaded immutable, by using + the same renewal secret, or add a new lease by choosing a different + renewal secret. + """ + # Create a storage index: + storage_index, _, lease_secret = self.upload(0) + + [lease] = self.http.storage_server.get_leases(storage_index) + initial_expiration_time = lease.get_expiration_time() + + # Time passes: + self.http.clock.advance(167) + + # We renew the lease: + result_of( + self.general_client.add_or_renew_lease( + storage_index, lease_secret, lease_secret + ) + ) + + # More time passes: + self.http.clock.advance(10) + + # We create a new lease: + lease_secret2 = urandom(32) + result_of( + self.general_client.add_or_renew_lease( + storage_index, lease_secret2, lease_secret2 + ) + ) + + [lease1, lease2] = self.http.storage_server.get_leases(storage_index) + self.assertEqual(lease1.get_expiration_time(), initial_expiration_time + 167) + self.assertEqual(lease2.get_expiration_time(), initial_expiration_time + 177) + + def test_read_of_wrong_storage_index_fails(self): + """ + Reading from unknown storage index results in 404. + """ + with assert_fails_with_http_code(self, http.NOT_FOUND): + result_of( + self.client.read_share_chunk( + b"1" * 16, + 1, + 0, + 10, + ) + ) + + def test_read_of_wrong_share_number_fails(self): + """ + Reading from unknown storage index results in 404. + """ + storage_index, _, _ = self.upload(1) + with assert_fails_with_http_code(self, http.NOT_FOUND): + result_of( + self.client.read_share_chunk( + storage_index, + 7, # different share number + 0, + 10, + ) + ) + + def test_read_with_negative_offset_fails(self): + """ + Malformed or unsupported Range headers result in 416 (requested range + not satisfiable) error. + """ + storage_index, _, _ = self.upload(1) + + def check_bad_range(bad_range_value): + client = StorageClientImmutables( + StorageClientWithHeadersOverride( + self.http.client, {"range": bad_range_value} + ) + ) + + with assert_fails_with_http_code( + self, http.REQUESTED_RANGE_NOT_SATISFIABLE + ): + result_of( + client.read_share_chunk( + storage_index, + 1, + 0, + 10, + ) + ) + + # Bad unit + check_bad_range("molluscs=0-9") + # Negative offsets + check_bad_range("bytes=-2-9") + check_bad_range("bytes=0--10") + # Negative offset no endpoint + check_bad_range("bytes=-300-") + check_bad_range("bytes=") + # Multiple ranges are currently unsupported, even if they're + # semantically valid under HTTP: + check_bad_range("bytes=0-5, 6-7") + # Ranges without an end are currently unsupported, even if they're + # semantically valid under HTTP. + check_bad_range("bytes=0-") + + @given(data_length=st.integers(min_value=1, max_value=300000)) + def test_read_with_no_range(self, data_length): + """ + A read with no range returns the whole immutable. + """ + storage_index, uploaded_data, _ = self.upload(1, data_length) + response = result_of( + self.http.client.request( + "GET", + self.http.client.relative_url( + "/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + ), + ) + ) + self.assertEqual(response.code, http.OK) + self.assertEqual(result_of(response.content()), uploaded_data) + + def test_validate_content_range_response_to_read(self): + """ + The server responds to ranged reads with an appropriate Content-Range + header. + """ + storage_index, _, _ = self.upload(1, 26) + + def check_range(requested_range, expected_response): + headers = Headers() + headers.setRawHeaders("range", [requested_range]) + response = result_of( + self.http.client.request( + "GET", + self.http.client.relative_url( + "/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + ), + headers=headers, + ) + ) + self.assertEqual( + response.headers.getRawHeaders("content-range"), [expected_response] + ) + + check_range("bytes=0-10", "bytes 0-10/*") + # Can't go beyond the end of the immutable! + check_range("bytes=10-100", "bytes 10-25/*") + + +class ImmutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): + """Shared tests, running on immutables.""" + + KIND = "immutable" + clientFactory = StorageClientImmutables + + def setUp(self): + super(ImmutableSharedTests, self).setUp() + self.http = self.useFixture(HttpTestFixture()) + self.client = self.clientFactory(self.http.client) + self.general_client = StorageClientGeneral(self.http.client) + + def upload(self, share_number, data_length=26): + """ + Create a share, return (storage_index, uploaded_data). + """ + uploaded_data = (b"abcdefghijklmnopqrstuvwxyz" * ((data_length // 26) + 1))[ + :data_length + ] + upload_secret = urandom(32) + lease_secret = urandom(32) + storage_index = urandom(16) + result_of( + self.client.create( + storage_index, + {share_number}, + data_length, + upload_secret, + lease_secret, + lease_secret, + ) + ) + result_of( + self.client.write_share_chunk( + storage_index, + share_number, + upload_secret, + 0, + uploaded_data, + ) + ) + return storage_index, uploaded_data, lease_secret From 85774ced9526df771ed4b0ec14bc4fe83eaeb1dd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 10:56:37 -0400 Subject: [PATCH 048/289] Run shared tests on mutables too, with appropriate fixes to the tests and the server. --- src/allmydata/storage/http_server.py | 12 ++-- src/allmydata/test/test_storage_http.py | 82 +++++++++++++++++-------- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 9735a0626..46023be72 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -599,7 +599,6 @@ class HTTPServer(object): ) def mutable_read_test_write(self, request, authorization, storage_index): """Read/test/write combined operation for mutables.""" - # TODO unit tests rtw_request = self._read_encoded(request, _SCHEMAS["mutable_read_test_write"]) secrets = ( authorization[Secrets.WRITE_ENABLER], @@ -635,11 +634,13 @@ class HTTPServer(object): ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): """Read a chunk from a mutable.""" - # TODO unit tests def read_data(offset, length): - return self._storage_server.slot_readv( - storage_index, [share_number], [(offset, length)] - )[share_number][0] + try: + return self._storage_server.slot_readv( + storage_index, [share_number], [(offset, length)] + )[share_number][0] + except KeyError: + raise _HTTPError(http.NOT_FOUND) return read_range(request, read_data) @@ -664,7 +665,6 @@ class HTTPServer(object): self, request, authorization, storage_index, share_number ): """Indicate that given share is corrupt, with a text reason.""" - # TODO unit test all the paths if share_number not in { shnum for (shnum, _) in self._storage_server.get_shares(storage_index) }: diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index fc79fbe34..7ed4cd235 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -5,7 +5,7 @@ Tests for HTTP storage client + server. from base64 import b64encode from contextlib import contextmanager from os import urandom -from typing import Union, Callable, Tuple +from typing import Union, Callable, Tuple, Iterable from cbor2 import dumps from pycddl import ValidationError as CDDLValidationError from hypothesis import assume, given, strategies as st @@ -23,6 +23,7 @@ from werkzeug.exceptions import NotFound as WNotFound from .common import SyncTestCase from ..storage.http_common import get_content_type, CBOR_MIME_TYPE from ..storage.common import si_b2a +from ..storage.lease import LeaseInfo from ..storage.server import StorageServer from ..storage.http_server import ( HTTPServer, @@ -1094,20 +1095,6 @@ class MutableHTTPAPIsTests(SyncTestCase): b"abcdef-0", ) - # TODO refactor reads tests so they're shared - - def test_lease_renew_and_add(self): - pass - - def test_lease_on_unknown_storage_index(self): - pass - - def test_advise_corrupt_share(self): - pass - - def test_advise_corrupt_share_unknown(self): - pass - class SharedImmutableMutableTestsMixin: """ @@ -1127,6 +1114,10 @@ class SharedImmutableMutableTestsMixin: """ raise NotImplementedError + def get_leases(self, storage_index: bytes) -> Iterable[LeaseInfo]: + """Get leases for the storage index.""" + raise NotImplementedError() + def test_advise_corrupt_share(self): """ Advising share was corrupted succeeds from HTTP client's perspective, @@ -1160,14 +1151,14 @@ class SharedImmutableMutableTestsMixin: def test_lease_renew_and_add(self): """ - It's possible the renew the lease on an uploaded immutable, by using - the same renewal secret, or add a new lease by choosing a different - renewal secret. + It's possible the renew the lease on an uploaded mutable/immutable, by + using the same renewal secret, or add a new lease by choosing a + different renewal secret. """ # Create a storage index: storage_index, _, lease_secret = self.upload(0) - [lease] = self.http.storage_server.get_leases(storage_index) + [lease] = self.get_leases(storage_index) initial_expiration_time = lease.get_expiration_time() # Time passes: @@ -1191,7 +1182,7 @@ class SharedImmutableMutableTestsMixin: ) ) - [lease1, lease2] = self.http.storage_server.get_leases(storage_index) + [lease1, lease2] = self.get_leases(storage_index) self.assertEqual(lease1.get_expiration_time(), initial_expiration_time + 167) self.assertEqual(lease2.get_expiration_time(), initial_expiration_time + 177) @@ -1232,7 +1223,7 @@ class SharedImmutableMutableTestsMixin: storage_index, _, _ = self.upload(1) def check_bad_range(bad_range_value): - client = StorageClientImmutables( + client = self.clientFactory( StorageClientWithHeadersOverride( self.http.client, {"range": bad_range_value} ) @@ -1268,7 +1259,7 @@ class SharedImmutableMutableTestsMixin: @given(data_length=st.integers(min_value=1, max_value=300000)) def test_read_with_no_range(self, data_length): """ - A read with no range returns the whole immutable. + A read with no range returns the whole mutable/immutable. """ storage_index, uploaded_data, _ = self.upload(1, data_length) response = result_of( @@ -1306,7 +1297,7 @@ class SharedImmutableMutableTestsMixin: ) check_range("bytes=0-10", "bytes 0-10/*") - # Can't go beyond the end of the immutable! + # Can't go beyond the end of the mutable/immutable! check_range("bytes=10-100", "bytes 10-25/*") @@ -1324,7 +1315,7 @@ class ImmutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): def upload(self, share_number, data_length=26): """ - Create a share, return (storage_index, uploaded_data). + Create a share, return (storage_index, uploaded_data, lease_secret). """ uploaded_data = (b"abcdefghijklmnopqrstuvwxyz" * ((data_length // 26) + 1))[ :data_length @@ -1352,3 +1343,46 @@ class ImmutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): ) ) return storage_index, uploaded_data, lease_secret + + def get_leases(self, storage_index): + return self.http.storage_server.get_leases(storage_index) + + +class MutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): + """Shared tests, running on mutables.""" + + KIND = "mutable" + clientFactory = StorageClientMutables + + def setUp(self): + super(MutableSharedTests, self).setUp() + self.http = self.useFixture(HttpTestFixture()) + self.client = self.clientFactory(self.http.client) + self.general_client = StorageClientGeneral(self.http.client) + + def upload(self, share_number, data_length=26): + """ + Create a share, return (storage_index, uploaded_data, lease_secret). + """ + data = (b"abcdefghijklmnopqrstuvwxyz" * ((data_length // 26) + 1))[:data_length] + write_secret = urandom(32) + lease_secret = urandom(32) + storage_index = urandom(16) + result_of( + self.client.read_test_write_chunks( + storage_index, + write_secret, + lease_secret, + lease_secret, + { + share_number: TestWriteVectors( + write_vectors=[WriteVector(offset=0, data=data)] + ), + }, + [], + ) + ) + return storage_index, data, lease_secret + + def get_leases(self, storage_index): + return self.http.storage_server.get_slot_leases(storage_index) From ca0f311861aaefb2ae532abc6299b668d166862e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 10:59:29 -0400 Subject: [PATCH 049/289] News file. --- newsfragments/3896.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3896.minor diff --git a/newsfragments/3896.minor b/newsfragments/3896.minor new file mode 100644 index 000000000..e69de29bb From c3a304e1cc0d2eadd62017cbdea367063ec5bed2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 11:00:07 -0400 Subject: [PATCH 050/289] Lint and mypy fixes. --- src/allmydata/storage/http_client.py | 2 +- src/allmydata/storage/http_server.py | 6 +++--- src/allmydata/test/test_storage_http.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 9711e748d..9203d02ab 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -586,7 +586,7 @@ class StorageClientImmutables(object): ) @inlineCallbacks - def list_shares(self, storage_index): # type: (bytes,) -> Deferred[set[int]] + def list_shares(self, storage_index: bytes) -> Deferred[set[int]]: """ Return the set of shares for a given storage index. """ diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 46023be72..bcad0e972 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -3,7 +3,7 @@ HTTP server for storage. """ from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any, Callable +from typing import Dict, List, Set, Tuple, Any, Callable, Optional from functools import wraps from base64 import b64decode @@ -274,7 +274,7 @@ _SCHEMAS = { } -def read_range(request, read_data: Callable[int, int, bytes]) -> Optional[bytes]: +def read_range(request, read_data: Callable[[int, int], bytes]) -> Optional[bytes]: """ Parse the ``Range`` header, read appropriately, return as result. @@ -298,7 +298,7 @@ def read_range(request, read_data: Callable[int, int, bytes]) -> Optional[bytes] data = read_data(start, start + 65536) if not data: request.finish() - return + return None request.write(data) start += len(data) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 7ed4cd235..5e0b35d88 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -1105,7 +1105,7 @@ class SharedImmutableMutableTestsMixin: general_client: StorageClientGeneral client: Union[StorageClientImmutables, StorageClientMutables] clientFactory: Callable[ - StorageClient, Union[StorageClientImmutables, StorageClientMutables] + [StorageClient], Union[StorageClientImmutables, StorageClientMutables] ] def upload(self, share_number: int, data_length=26) -> Tuple[bytes, bytes, bytes]: From 528d902460ae5bddaf3f140743072b3324fca5b7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 11:15:25 -0400 Subject: [PATCH 051/289] News file. --- newsfragments/3900.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3900.minor diff --git a/newsfragments/3900.minor b/newsfragments/3900.minor new file mode 100644 index 000000000..e69de29bb From 8694543659a36007c0d7a0787808fa119df15931 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 6 Jun 2022 11:15:51 -0400 Subject: [PATCH 052/289] Work with Sphinx 5. --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index af05e5900..cc9a11166 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -63,7 +63,7 @@ release = u'1.x' # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: From 00381bc24fefc3a831d4f253819d154609266423 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 8 Jun 2022 13:52:45 -0400 Subject: [PATCH 053/289] Correction now that it does more than what it did before. --- src/allmydata/storage/http_server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index bcad0e972..63be2f270 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -276,7 +276,8 @@ _SCHEMAS = { def read_range(request, read_data: Callable[[int, int], bytes]) -> Optional[bytes]: """ - Parse the ``Range`` header, read appropriately, return as result. + Read an optional ``Range`` header, reads data appropriately via the given + callable, return as result. Only parses a subset of ``Range`` headers that we support: must be set, bytes only, only a single range, the end must be explicitly specified. From db426513558bd328fc803c74c382f2ae0a214b92 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 8 Jun 2022 13:55:47 -0400 Subject: [PATCH 054/289] Be more consistent and just always write to the request in `read_range`. --- src/allmydata/storage/http_server.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 63be2f270..543fceb98 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -274,21 +274,20 @@ _SCHEMAS = { } -def read_range(request, read_data: Callable[[int, int], bytes]) -> Optional[bytes]: +def read_range(request, read_data: Callable[[int, int], bytes]) -> None: """ Read an optional ``Range`` header, reads data appropriately via the given - callable, return as result. + callable, writes the data to the request. Only parses a subset of ``Range`` headers that we support: must be set, bytes only, only a single range, the end must be explicitly specified. Raises a ``_HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE)`` if parsing is not possible or the header isn't set. - Returns a result that should be returned from the request handler, and sets - appropriate response headers. - Takes a function that will do the actual reading given the start offset and a length to read. + + The resulting data is written to the request. """ if request.getHeader("range") is None: # Return the whole thing. @@ -299,7 +298,7 @@ def read_range(request, read_data: Callable[[int, int], bytes]) -> Optional[byte data = read_data(start, start + 65536) if not data: request.finish() - return None + return request.write(data) start += len(data) @@ -326,7 +325,8 @@ def read_range(request, read_data: Callable[[int, int], bytes]) -> Optional[byte "content-range", ContentRange("bytes", offset, offset + len(data)).to_header(), ) - return data + request.write(data) + request.finish() class HTTPServer(object): From d37f187c078868833a98a362e528f559b97cdd94 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 8 Jun 2022 13:56:23 -0400 Subject: [PATCH 055/289] Lint fix. --- src/allmydata/storage/http_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 543fceb98..06a6863fa 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -3,7 +3,7 @@ HTTP server for storage. """ from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any, Callable, Optional +from typing import Dict, List, Set, Tuple, Any, Callable from functools import wraps from base64 import b64decode From e1daa192fbe8ff8168392ec9989664ddd4b3905a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 21 Jun 2022 17:20:08 -0400 Subject: [PATCH 056/289] Sketch of protocol switcher experiment. --- src/allmydata/node.py | 2 + src/allmydata/protocol_switch.py | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 src/allmydata/protocol_switch.py diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 3ac4c507b..0547d3fe6 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -51,6 +51,7 @@ from allmydata.util import configutil from allmydata.util.yamlutil import ( safe_load, ) +from .protocol_switch import FoolscapOrHttp from . import ( __full_version__, @@ -707,6 +708,7 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han the new Tub via `Tub.setOption` """ tub = Tub(**kwargs) + tub.negotiationClass = FoolscapOrHttp for (name, value) in list(tub_options.items()): tub.setOption(name, value) handlers = default_connection_handlers.copy() diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py new file mode 100644 index 000000000..59e1b609f --- /dev/null +++ b/src/allmydata/protocol_switch.py @@ -0,0 +1,84 @@ +""" +Support for listening with both HTTP and Foolscap on the same port. +""" + +from enum import Enum +from typing import Optional + +from twisted.internet.protocol import Protocol +from twisted.python.failure import Failure + +from foolscap.negotiate import Negotiation + +class ProtocolMode(Enum): + """Listening mode.""" + UNDECIDED = 0 + FOOLSCAP = 1 + HTTP = 2 + + +class PretendToBeNegotiation(type): + """😱""" + + def __instancecheck__(self, instance): + return (instance.__class__ == self) or isinstance(instance, Negotiation) + + +class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): + """ + Based on initial query, decide whether we're talking Foolscap or HTTP. + + Pretends to be a ``foolscap.negotiate.Negotiation`` instance. + """ + _foolscap : Optional[Negotiation] = None + _protocol_mode : ProtocolMode = ProtocolMode.UNDECIDED + _buffer: bytes = b"" + + def __init__(self, *args, **kwargs): + self._foolscap = Negotiation(*args, **kwargs) + + def __setattr__(self, name, value): + if name in {"_foolscap", "_protocol_mode", "_buffer", "transport"}: + object.__setattr__(self, name, value) + else: + setattr(self._foolscap, name, value) + + def __getattr__(self, name): + return getattr(self._foolscap, name) + + def makeConnection(self, transport): + Protocol.makeConnection(self, transport) + self._foolscap.makeConnection(transport) + + def initServer(self, *args, **kwargs): + return self._foolscap.initServer(*args, **kwargs) + + def initClient(self, *args, **kwargs): + assert not self._buffer + self._protocol_mode = ProtocolMode.FOOLSCAP + return self._foolscap.initClient(*args, **kwargs) + + def dataReceived(self, data: bytes) -> None: + if self._protocol_mode == ProtocolMode.FOOLSCAP: + return self._foolscap.dataReceived(data) + if self._protocol_mode == ProtocolMode.HTTP: + raise NotImplementedError() + + # UNDECIDED mode. + self._buffer += data + if len(self._buffer) < 8: + return + + # Check if it looks like Foolscap request. If so, it can handle this + # and later data: + if self._buffer.startswith(b"GET /id/"): + self._protocol_mode = ProtocolMode.FOOLSCAP + buf, self._buffer = self._buffer, b"" + return self._foolscap.dataReceived(buf) + else: + self._protocol_mode = ProtocolMode.HTTP + raise NotImplementedError("") + + def connectionLost(self, reason: Failure) -> None: + if self._protocol_mode == ProtocolMode.FOOLSCAP: + return self._foolscap.connectionLost(reason) From 7910867be6b8154f2b10031f3790b1a8c5eba821 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 22 Jun 2022 10:23:23 -0400 Subject: [PATCH 057/289] It actually works(?!) now. --- src/allmydata/protocol_switch.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 59e1b609f..fa23738d2 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -10,8 +10,10 @@ from twisted.python.failure import Failure from foolscap.negotiate import Negotiation + class ProtocolMode(Enum): """Listening mode.""" + UNDECIDED = 0 FOOLSCAP = 1 HTTP = 2 @@ -30,15 +32,22 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): Pretends to be a ``foolscap.negotiate.Negotiation`` instance. """ - _foolscap : Optional[Negotiation] = None - _protocol_mode : ProtocolMode = ProtocolMode.UNDECIDED + + _foolscap: Optional[Negotiation] = None + _protocol_mode: ProtocolMode = ProtocolMode.UNDECIDED _buffer: bytes = b"" def __init__(self, *args, **kwargs): self._foolscap = Negotiation(*args, **kwargs) def __setattr__(self, name, value): - if name in {"_foolscap", "_protocol_mode", "_buffer", "transport"}: + if name in { + "_foolscap", + "_protocol_mode", + "_buffer", + "transport", + "__class__", + }: object.__setattr__(self, name, value) else: setattr(self._foolscap, name, value) @@ -50,13 +59,15 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): Protocol.makeConnection(self, transport) self._foolscap.makeConnection(transport) - def initServer(self, *args, **kwargs): - return self._foolscap.initServer(*args, **kwargs) - def initClient(self, *args, **kwargs): + # After creation, a Negotiation instance either has initClient() or + # initServer() called. SInce this is a client, we're never going to do + # HTTP. Relying on __getattr__/__setattr__ doesn't work, for some + # reason, so just mutate ourselves appropriately. assert not self._buffer - self._protocol_mode = ProtocolMode.FOOLSCAP - return self._foolscap.initClient(*args, **kwargs) + self.__class__ = Negotiation + self.__dict__ = self._foolscap.__dict__ + return self.initClient(*args, **kwargs) def dataReceived(self, data: bytes) -> None: if self._protocol_mode == ProtocolMode.FOOLSCAP: @@ -69,7 +80,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): if len(self._buffer) < 8: return - # Check if it looks like Foolscap request. If so, it can handle this + # Check if it looks like a Foolscap request. If so, it can handle this # and later data: if self._buffer.startswith(b"GET /id/"): self._protocol_mode = ProtocolMode.FOOLSCAP From 7577d1e24ca8f8a93a11b3bd87deb251f40cbce8 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 22 Jun 2022 14:19:29 -0400 Subject: [PATCH 058/289] Sketch of HTTP support, still untested WIP. --- src/allmydata/client.py | 8 ++++++++ src/allmydata/node.py | 2 -- src/allmydata/protocol_switch.py | 24 ++++++++++++++++++++++-- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 56ecdc6ed..ad5feb2ed 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -64,6 +64,7 @@ from allmydata.interfaces import ( from allmydata.nodemaker import NodeMaker from allmydata.blacklist import Blacklist from allmydata import node +from .protocol_switch import FoolscapOrHttp KiB=1024 @@ -818,6 +819,13 @@ class _Client(node.Node, pollmixin.PollMixin): if anonymous_storage_enabled(self.config): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) + (_, _, swissnum) = furl.rpartition("/") + class FoolscapOrHttpWithCert(FoolscapOrHttp): + certificate = self.tub.myCertificate + storage_server = ss + swissnum = swissnum + self.tub.negotiationClass = FoolscapOrHttpWithCert + announcement["anonymous-storage-FURL"] = furl enabled_storage_servers = self._enable_storage_servers( diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 0547d3fe6..3ac4c507b 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -51,7 +51,6 @@ from allmydata.util import configutil from allmydata.util.yamlutil import ( safe_load, ) -from .protocol_switch import FoolscapOrHttp from . import ( __full_version__, @@ -708,7 +707,6 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han the new Tub via `Tub.setOption` """ tub = Tub(**kwargs) - tub.negotiationClass = FoolscapOrHttp for (name, value) in list(tub_options.items()): tub.setOption(name, value) handlers = default_connection_handlers.copy() diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index fa23738d2..5a9589c17 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -7,9 +7,14 @@ from typing import Optional from twisted.internet.protocol import Protocol from twisted.python.failure import Failure +from twisted.internet.ssl import CertificateOptions +from twisted.web.server import Site +from twisted.protocols.tls import TLSMemoryBIOFactory from foolscap.negotiate import Negotiation +from .storage.http_server import HTTPServer + class ProtocolMode(Enum): """Listening mode.""" @@ -47,6 +52,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): "_buffer", "transport", "__class__", + "_http", }: object.__setattr__(self, name, value) else: @@ -73,7 +79,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): if self._protocol_mode == ProtocolMode.FOOLSCAP: return self._foolscap.dataReceived(data) if self._protocol_mode == ProtocolMode.HTTP: - raise NotImplementedError() + return self._http.dataReceived(data) # UNDECIDED mode. self._buffer += data @@ -83,12 +89,26 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): # Check if it looks like a Foolscap request. If so, it can handle this # and later data: if self._buffer.startswith(b"GET /id/"): + # TODO or maybe just self.__class__ here too? self._protocol_mode = ProtocolMode.FOOLSCAP buf, self._buffer = self._buffer, b"" return self._foolscap.dataReceived(buf) else: self._protocol_mode = ProtocolMode.HTTP - raise NotImplementedError("") + + certificate_options = CertificateOptions( + privateKey=self.certificate.privateKey.original, + certificate=self.certificate.original, + ) + http_server = HTTPServer(self.storage_server, self.swissnum) + factory = TLSMemoryBIOFactory( + certificate_options, False, Site(http_server.get_resource()) + ) + protocol = factory.buildProtocol(self.transport.getPeer()) + protocol.makeConnection(self.transport) + protocol.dataReceived(self._buffer) + # TODO __getattr__ or maybe change the __class__ + self._http = protocol def connectionLost(self, reason: Failure) -> None: if self._protocol_mode == ProtocolMode.FOOLSCAP: From c5724c1d0a70ad1aa539aa0063a300bc359ddf21 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 22 Jun 2022 14:20:42 -0400 Subject: [PATCH 059/289] Clarify. --- src/allmydata/protocol_switch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 5a9589c17..50a7b1476 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -107,7 +107,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): protocol = factory.buildProtocol(self.transport.getPeer()) protocol.makeConnection(self.transport) protocol.dataReceived(self._buffer) - # TODO __getattr__ or maybe change the __class__ + # TODO maybe change the __class__ self._http = protocol def connectionLost(self, reason: Failure) -> None: From 1579530895c5e66997f95ff8424d26be73dec011 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 07:59:43 -0400 Subject: [PATCH 060/289] Add working HTTP support. --- src/allmydata/client.py | 8 ++------ src/allmydata/node.py | 3 +++ src/allmydata/protocol_switch.py | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index ad5feb2ed..294684b58 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -64,7 +64,7 @@ from allmydata.interfaces import ( from allmydata.nodemaker import NodeMaker from allmydata.blacklist import Blacklist from allmydata import node -from .protocol_switch import FoolscapOrHttp +from .protocol_switch import update_foolscap_or_http_class KiB=1024 @@ -820,11 +820,7 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - class FoolscapOrHttpWithCert(FoolscapOrHttp): - certificate = self.tub.myCertificate - storage_server = ss - swissnum = swissnum - self.tub.negotiationClass = FoolscapOrHttpWithCert + update_foolscap_or_http_class(self.tub.negotiationClass, self.tub.myCertificate, ss, swissnum.encode("ascii")) announcement["anonymous-storage-FURL"] = furl diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 3ac4c507b..93fa6a8e1 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -55,6 +55,8 @@ from allmydata.util.yamlutil import ( from . import ( __full_version__, ) +from .protocol_switch import create_foolscap_or_http_class + def _common_valid_config(): return configutil.ValidConfiguration({ @@ -707,6 +709,7 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han the new Tub via `Tub.setOption` """ tub = Tub(**kwargs) + tub.negotiationClass = create_foolscap_or_http_class() for (name, value) in list(tub_options.items()): tub.setOption(name, value) handlers = default_connection_handlers.copy() diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 50a7b1476..bb1a59bef 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -14,6 +14,7 @@ from twisted.protocols.tls import TLSMemoryBIOFactory from foolscap.negotiate import Negotiation from .storage.http_server import HTTPServer +from .storage.server import StorageServer class ProtocolMode(Enum): @@ -38,6 +39,11 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): Pretends to be a ``foolscap.negotiate.Negotiation`` instance. """ + # These three will be set by a subclass + swissnum: bytes + certificate = None # TODO figure out type + storage_server: StorageServer + _foolscap: Optional[Negotiation] = None _protocol_mode: ProtocolMode = ProtocolMode.UNDECIDED _buffer: bytes = b"" @@ -113,3 +119,16 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): def connectionLost(self, reason: Failure) -> None: if self._protocol_mode == ProtocolMode.FOOLSCAP: return self._foolscap.connectionLost(reason) + + +def create_foolscap_or_http_class(): + class FoolscapOrHttpWithCert(FoolscapOrHttp): + pass + + return FoolscapOrHttpWithCert + + +def update_foolscap_or_http_class(cls, certificate, storage_server, swissnum): + cls.certificate = certificate + cls.storage_server = storage_server + cls.swissnum = swissnum From 04156db74ef28c63fb2273277f3f52b5f6d4883c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:32:43 -0400 Subject: [PATCH 061/289] Delay Negotiation.connectionMade so we don't create unnecessary timeouts. --- src/allmydata/protocol_switch.py | 34 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index bb1a59bef..2f834081b 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -3,10 +3,10 @@ Support for listening with both HTTP and Foolscap on the same port. """ from enum import Enum -from typing import Optional +from typing import Optional, Tuple from twisted.internet.protocol import Protocol -from twisted.python.failure import Failure +from twisted.internet.interfaces import ITransport from twisted.internet.ssl import CertificateOptions from twisted.web.server import Site from twisted.protocols.tls import TLSMemoryBIOFactory @@ -21,8 +21,7 @@ class ProtocolMode(Enum): """Listening mode.""" UNDECIDED = 0 - FOOLSCAP = 1 - HTTP = 2 + HTTP = 1 class PretendToBeNegotiation(type): @@ -67,9 +66,13 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): def __getattr__(self, name): return getattr(self._foolscap, name) - def makeConnection(self, transport): - Protocol.makeConnection(self, transport) - self._foolscap.makeConnection(transport) + def _convert_to_negotiation(self) -> Tuple[bytes, ITransport]: + """Convert self to a ``Negotiation`` instance, return any buffered bytes""" + transport = self.transport + buf = self._buffer + self.__class__ = Negotiation # type: ignore + self.__dict__ = self._foolscap.__dict__ + return buf, transport def initClient(self, *args, **kwargs): # After creation, a Negotiation instance either has initClient() or @@ -77,13 +80,10 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): # HTTP. Relying on __getattr__/__setattr__ doesn't work, for some # reason, so just mutate ourselves appropriately. assert not self._buffer - self.__class__ = Negotiation - self.__dict__ = self._foolscap.__dict__ + self._convert_to_negotiation() return self.initClient(*args, **kwargs) def dataReceived(self, data: bytes) -> None: - if self._protocol_mode == ProtocolMode.FOOLSCAP: - return self._foolscap.dataReceived(data) if self._protocol_mode == ProtocolMode.HTTP: return self._http.dataReceived(data) @@ -95,10 +95,10 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): # Check if it looks like a Foolscap request. If so, it can handle this # and later data: if self._buffer.startswith(b"GET /id/"): - # TODO or maybe just self.__class__ here too? - self._protocol_mode = ProtocolMode.FOOLSCAP - buf, self._buffer = self._buffer, b"" - return self._foolscap.dataReceived(buf) + buf, transport = self._convert_to_negotiation() + self.makeConnection(transport) + self.dataReceived(buf) + return else: self._protocol_mode = ProtocolMode.HTTP @@ -116,10 +116,6 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): # TODO maybe change the __class__ self._http = protocol - def connectionLost(self, reason: Failure) -> None: - if self._protocol_mode == ProtocolMode.FOOLSCAP: - return self._foolscap.connectionLost(reason) - def create_foolscap_or_http_class(): class FoolscapOrHttpWithCert(FoolscapOrHttp): From d86f8519dcdd14622eb5d695b880a77db2038e89 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:41:01 -0400 Subject: [PATCH 062/289] Simplify implementation. --- src/allmydata/protocol_switch.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 2f834081b..11a35c324 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -2,7 +2,6 @@ Support for listening with both HTTP and Foolscap on the same port. """ -from enum import Enum from typing import Optional, Tuple from twisted.internet.protocol import Protocol @@ -17,13 +16,6 @@ from .storage.http_server import HTTPServer from .storage.server import StorageServer -class ProtocolMode(Enum): - """Listening mode.""" - - UNDECIDED = 0 - HTTP = 1 - - class PretendToBeNegotiation(type): """😱""" @@ -43,21 +35,16 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): certificate = None # TODO figure out type storage_server: StorageServer - _foolscap: Optional[Negotiation] = None - _protocol_mode: ProtocolMode = ProtocolMode.UNDECIDED - _buffer: bytes = b"" - def __init__(self, *args, **kwargs): - self._foolscap = Negotiation(*args, **kwargs) + self._foolscap: Negotiation = Negotiation(*args, **kwargs) + self._buffer: bytes = b"" def __setattr__(self, name, value): if name in { "_foolscap", - "_protocol_mode", "_buffer", "transport", "__class__", - "_http", }: object.__setattr__(self, name, value) else: @@ -66,7 +53,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): def __getattr__(self, name): return getattr(self._foolscap, name) - def _convert_to_negotiation(self) -> Tuple[bytes, ITransport]: + def _convert_to_negotiation(self) -> Tuple[bytes, Optional[ITransport]]: """Convert self to a ``Negotiation`` instance, return any buffered bytes""" transport = self.transport buf = self._buffer @@ -84,10 +71,11 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): return self.initClient(*args, **kwargs) def dataReceived(self, data: bytes) -> None: - if self._protocol_mode == ProtocolMode.HTTP: - return self._http.dataReceived(data) + """Handle incoming data. - # UNDECIDED mode. + Once we've decided which protocol we are, update self.__class__, at + which point all methods will be called on the new class. + """ self._buffer += data if len(self._buffer) < 8: return @@ -100,8 +88,6 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): self.dataReceived(buf) return else: - self._protocol_mode = ProtocolMode.HTTP - certificate_options = CertificateOptions( privateKey=self.certificate.privateKey.original, certificate=self.certificate.original, @@ -113,8 +99,8 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): protocol = factory.buildProtocol(self.transport.getPeer()) protocol.makeConnection(self.transport) protocol.dataReceived(self._buffer) - # TODO maybe change the __class__ - self._http = protocol + self.__class__ = protocol.__class__ + self.__dict__ = protocol.__dict__ def create_foolscap_or_http_class(): From 026d63cd6a83f274fb1336945afe5145f0afc226 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:41:47 -0400 Subject: [PATCH 063/289] Fix some mypy warnings. --- src/allmydata/protocol_switch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 11a35c324..f3a624318 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -6,7 +6,7 @@ from typing import Optional, Tuple from twisted.internet.protocol import Protocol from twisted.internet.interfaces import ITransport -from twisted.internet.ssl import CertificateOptions +from twisted.internet.ssl import CertificateOptions, PrivateCertificate from twisted.web.server import Site from twisted.protocols.tls import TLSMemoryBIOFactory @@ -32,7 +32,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): # These three will be set by a subclass swissnum: bytes - certificate = None # TODO figure out type + certificate: PrivateCertificate storage_server: StorageServer def __init__(self, *args, **kwargs): @@ -96,6 +96,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): factory = TLSMemoryBIOFactory( certificate_options, False, Site(http_server.get_resource()) ) + assert self.transport is not None protocol = factory.buildProtocol(self.transport.getPeer()) protocol.makeConnection(self.transport) protocol.dataReceived(self._buffer) From d70f583172e409268cd83c58d935815ec70296b4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:43:46 -0400 Subject: [PATCH 064/289] More cleanups. --- src/allmydata/protocol_switch.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index f3a624318..23d7dda84 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -30,7 +30,8 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): Pretends to be a ``foolscap.negotiate.Negotiation`` instance. """ - # These three will be set by a subclass + # These three will be set by a subclass in update_foolscap_or_http_class() + # below. swissnum: bytes certificate: PrivateCertificate storage_server: StorageServer @@ -53,19 +54,18 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): def __getattr__(self, name): return getattr(self._foolscap, name) - def _convert_to_negotiation(self) -> Tuple[bytes, Optional[ITransport]]: - """Convert self to a ``Negotiation`` instance, return any buffered bytes""" - transport = self.transport - buf = self._buffer + def _convert_to_negotiation(self): + """ + Convert self to a ``Negotiation`` instance, return any buffered + bytes and the transport if any. + """ self.__class__ = Negotiation # type: ignore self.__dict__ = self._foolscap.__dict__ - return buf, transport def initClient(self, *args, **kwargs): # After creation, a Negotiation instance either has initClient() or - # initServer() called. SInce this is a client, we're never going to do - # HTTP. Relying on __getattr__/__setattr__ doesn't work, for some - # reason, so just mutate ourselves appropriately. + # initServer() called. Since this is a client, we're never going to do + # HTTP, so we can immediately become a Negotiation instance. assert not self._buffer self._convert_to_negotiation() return self.initClient(*args, **kwargs) @@ -83,7 +83,9 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): # Check if it looks like a Foolscap request. If so, it can handle this # and later data: if self._buffer.startswith(b"GET /id/"): - buf, transport = self._convert_to_negotiation() + transport = self.transport + buf = self._buffer + self._convert_to_negotiation() self.makeConnection(transport) self.dataReceived(buf) return From 0c99a9f7b0b14d340586d42cb589d3c888c3db1d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:44:17 -0400 Subject: [PATCH 065/289] Make it more accurate. --- src/allmydata/protocol_switch.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 23d7dda84..899c1258f 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -56,8 +56,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): def _convert_to_negotiation(self): """ - Convert self to a ``Negotiation`` instance, return any buffered - bytes and the transport if any. + Convert self to a ``Negotiation`` instance. """ self.__class__ = Negotiation # type: ignore self.__dict__ = self._foolscap.__dict__ From eb1e48bcc367e78945024cc642a59693aa3ecf09 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:47:33 -0400 Subject: [PATCH 066/289] Add a timeout. --- src/allmydata/protocol_switch.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 899c1258f..2d2590977 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -2,13 +2,14 @@ Support for listening with both HTTP and Foolscap on the same port. """ -from typing import Optional, Tuple +from typing import Optional from twisted.internet.protocol import Protocol -from twisted.internet.interfaces import ITransport +from twisted.internet.interfaces import IDelayedCall from twisted.internet.ssl import CertificateOptions, PrivateCertificate from twisted.web.server import Site from twisted.protocols.tls import TLSMemoryBIOFactory +from twisted.internet import reactor from foolscap.negotiate import Negotiation @@ -36,6 +37,8 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): certificate: PrivateCertificate storage_server: StorageServer + _timeout: IDelayedCall + def __init__(self, *args, **kwargs): self._foolscap: Negotiation = Negotiation(*args, **kwargs) self._buffer: bytes = b"" @@ -69,6 +72,9 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): self._convert_to_negotiation() return self.initClient(*args, **kwargs) + def connectionMade(self): + self._timeout = reactor.callLater(30, self.transport.abortConnection) + def dataReceived(self, data: bytes) -> None: """Handle incoming data. @@ -80,7 +86,8 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): return # Check if it looks like a Foolscap request. If so, it can handle this - # and later data: + # and later data, otherwise assume HTTPS. + self._timeout.cancel() if self._buffer.startswith(b"GET /id/"): transport = self.transport buf = self._buffer From 01d8cc7ab66745d4371820334619f3ecd4ca2881 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:49:07 -0400 Subject: [PATCH 067/289] Put the attribute on the correct object. --- src/allmydata/protocol_switch.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 2d2590977..d26bad745 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -44,12 +44,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): self._buffer: bytes = b"" def __setattr__(self, name, value): - if name in { - "_foolscap", - "_buffer", - "transport", - "__class__", - }: + if name in {"_foolscap", "_buffer", "transport", "__class__", "_timeout"}: object.__setattr__(self, name, value) else: setattr(self._foolscap, name, value) From 1154371d22abd859a48efdee8eee9146b3164b1c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 23 Jun 2022 12:51:07 -0400 Subject: [PATCH 068/289] Clarifying comments. --- src/allmydata/protocol_switch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index d26bad745..9b4e30671 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -84,6 +84,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): # and later data, otherwise assume HTTPS. self._timeout.cancel() if self._buffer.startswith(b"GET /id/"): + # We're a Foolscap Negotiation server protocol instance: transport = self.transport buf = self._buffer self._convert_to_negotiation() @@ -91,6 +92,7 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): self.dataReceived(buf) return else: + # We're a HTTPS protocol instance, serving the storage protocol: certificate_options = CertificateOptions( privateKey=self.certificate.privateKey.original, certificate=self.certificate.original, From bfd54dc6eadf4e012c3dbf32a2356243c0aa505c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 27 Jun 2022 11:30:49 -0400 Subject: [PATCH 069/289] Switch to newer attrs API, for consistency across the module. --- src/allmydata/storage/http_server.py | 31 ++++++++++++---------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 06a6863fa..ebd2323ef 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -19,7 +19,7 @@ from twisted.web.server import Site from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.python.filepath import FilePath -import attr +from attrs import define, field from werkzeug.http import ( parse_range_header, parse_content_range_header, @@ -137,31 +137,31 @@ def _authorized_route(app, required_secrets, *route_args, **route_kwargs): return decorator -@attr.s +@define class StorageIndexUploads(object): """ In-progress upload to storage index. """ # Map share number to BucketWriter - shares = attr.ib(factory=dict) # type: Dict[int,BucketWriter] + shares: dict[int, BucketWriter] = field(factory=dict) # Map share number to the upload secret (different shares might have # different upload secrets). - upload_secrets = attr.ib(factory=dict) # type: Dict[int,bytes] + upload_secrets: dict[int, bytes] = field(factory=dict) -@attr.s +@define class UploadsInProgress(object): """ Keep track of uploads for storage indexes. """ # Map storage index to corresponding uploads-in-progress - _uploads = attr.ib(type=Dict[bytes, StorageIndexUploads], factory=dict) + _uploads: dict[bytes, StorageIndexUploads] = field(factory=dict) # Map BucketWriter to (storage index, share number) - _bucketwriters = attr.ib(type=Dict[BucketWriter, Tuple[bytes, int]], factory=dict) + _bucketwriters: dict[BucketWriter, Tuple[bytes, int]] = field(factory=dict) def add_write_bucket( self, @@ -445,10 +445,7 @@ class HTTPServer(object): return self._send_encoded( request, - { - "already-have": set(already_got), - "allocated": set(sharenum_to_bucket), - }, + {"already-have": set(already_got), "allocated": set(sharenum_to_bucket)}, ) @_authorized_route( @@ -635,6 +632,7 @@ class HTTPServer(object): ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): """Read a chunk from a mutable.""" + def read_data(offset, length): try: return self._storage_server.slot_readv( @@ -646,10 +644,7 @@ class HTTPServer(object): return read_range(request, read_data) @_authorized_route( - _app, - set(), - "/v1/mutable//shares", - methods=["GET"], + _app, set(), "/v1/mutable//shares", methods=["GET"] ) def enumerate_mutable_shares(self, request, authorization, storage_index): """List mutable shares for a storage index.""" @@ -679,7 +674,7 @@ class HTTPServer(object): @implementer(IStreamServerEndpoint) -@attr.s +@define class _TLSEndpointWrapper(object): """ Wrap an existing endpoint with the server-side storage TLS policy. This is @@ -687,8 +682,8 @@ class _TLSEndpointWrapper(object): example there's Tor and i2p. """ - endpoint = attr.ib(type=IStreamServerEndpoint) - context_factory = attr.ib(type=CertificateOptions) + endpoint: IStreamServerEndpoint + context_factory: CertificateOptions @classmethod def from_paths( From 06eca79263382fab3b742d1d3243463735bc79f6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 27 Jun 2022 14:03:05 -0400 Subject: [PATCH 070/289] Minimal streaming implementation. --- src/allmydata/storage/http_server.py | 55 ++++++++++++++++++------- src/allmydata/test/test_storage_http.py | 21 ++++++++-- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index ebd2323ef..b8887bb4e 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -12,10 +12,15 @@ import binascii from zope.interface import implementer from klein import Klein from twisted.web import http -from twisted.internet.interfaces import IListeningPort, IStreamServerEndpoint +from twisted.web.server import NOT_DONE_YET +from twisted.internet.interfaces import ( + IListeningPort, + IStreamServerEndpoint, + IPullProducer, +) from twisted.internet.defer import Deferred from twisted.internet.ssl import CertificateOptions, Certificate, PrivateCertificate -from twisted.web.server import Site +from twisted.web.server import Site, Request from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.python.filepath import FilePath @@ -274,7 +279,37 @@ _SCHEMAS = { } -def read_range(request, read_data: Callable[[int, int], bytes]) -> None: +@implementer(IPullProducer) +@define +class _ReadProducer: + """ + Producer that calls a read function, and writes to a request. + """ + + request: Request + read_data: Callable[[int, int], bytes] + result: Deferred + start: int = field(default=0) + + def resumeProducing(self): + data = self.read_data(self.start, self.start + 65536) + if not data: + self.request.unregisterProducer() + d = self.result + del self.result + d.callback(b"") + return + self.request.write(data) + self.start += len(data) + + def pauseProducing(self): + pass + + def stopProducing(self): + pass + + +def read_range(request: Request, read_data: Callable[[int, int], bytes]) -> None: """ Read an optional ``Range`` header, reads data appropriately via the given callable, writes the data to the request. @@ -290,17 +325,9 @@ def read_range(request, read_data: Callable[[int, int], bytes]) -> None: The resulting data is written to the request. """ if request.getHeader("range") is None: - # Return the whole thing. - start = 0 - while True: - # TODO should probably yield to event loop occasionally... - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - data = read_data(start, start + 65536) - if not data: - request.finish() - return - request.write(data) - start += len(data) + d = Deferred() + request.registerProducer(_ReadProducer(request, read_data, d), False) + return d range_header = parse_range_header(request.getHeader("range")) if ( diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 5e0b35d88..23d9bc276 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -6,6 +6,7 @@ from base64 import b64encode from contextlib import contextmanager from os import urandom from typing import Union, Callable, Tuple, Iterable +from time import sleep, time from cbor2 import dumps from pycddl import ValidationError as CDDLValidationError from hypothesis import assume, given, strategies as st @@ -14,7 +15,8 @@ from treq.testing import StubTreq from klein import Klein from hyperlink import DecodedURL from collections_extended import RangeMap -from twisted.internet.task import Clock +from twisted.internet.task import Clock, Cooperator +from twisted.internet import task from twisted.web import http from twisted.web.http_headers import Headers from werkzeug import routing @@ -316,10 +318,11 @@ class HttpTestFixture(Fixture): self.tempdir.path, b"\x00" * 20, clock=self.clock ) self.http_server = HTTPServer(self.storage_server, SWISSNUM_FOR_TEST) + self.treq = StubTreq(self.http_server.get_resource()) self.client = StorageClient( DecodedURL.from_text("http://127.0.0.1"), SWISSNUM_FOR_TEST, - treq=StubTreq(self.http_server.get_resource()), + treq=self.treq, ) @@ -1261,8 +1264,20 @@ class SharedImmutableMutableTestsMixin: """ A read with no range returns the whole mutable/immutable. """ + self.patch( + task, + "_theCooperator", + Cooperator(scheduler=lambda c: self.http.clock.callLater(0.000001, c)), + ) + + def result_of_with_flush(d): + for i in range(100): + self.http.clock.advance(0.001) + self.http.treq.flush() + return result_of(d) + storage_index, uploaded_data, _ = self.upload(1, data_length) - response = result_of( + response = result_of_with_flush( self.http.client.request( "GET", self.http.client.relative_url( From 6dd2b2d58357f30e7b663008e1f68ad798846f91 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 27 Jun 2022 14:44:51 -0400 Subject: [PATCH 071/289] More streaming, with tests passing again. --- src/allmydata/storage/http_server.py | 88 ++++++++++++++++++++----- src/allmydata/test/test_storage_http.py | 74 +++++++++++++-------- 2 files changed, 115 insertions(+), 47 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index b8887bb4e..a91b7963e 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -281,9 +281,10 @@ _SCHEMAS = { @implementer(IPullProducer) @define -class _ReadProducer: +class _ReadAllProducer: """ - Producer that calls a read function, and writes to a request. + Producer that calls a read function repeatedly to read all the data, and + writes to a request. """ request: Request @@ -292,7 +293,7 @@ class _ReadProducer: start: int = field(default=0) def resumeProducing(self): - data = self.read_data(self.start, self.start + 65536) + data = self.read_data(self.start, 65536) if not data: self.request.unregisterProducer() d = self.result @@ -309,6 +310,52 @@ class _ReadProducer: pass +@implementer(IPullProducer) +@define +class _ReadRangeProducer: + """ + Producer that calls a read function to read a range of data, and writes to + a request. + """ + + request: Request + read_data: Callable[[int, int], bytes] + result: Deferred + start: int + remaining: int + first_read: bool = field(default=True) + + def resumeProducing(self): + to_read = min(self.remaining, 65536) + data = self.read_data(self.start, to_read) + assert len(data) <= to_read + if self.first_read and data: + # For empty bodies the content-range header makes no sense since + # the end of the range is inclusive. + self.request.setHeader( + "content-range", + ContentRange("bytes", self.start, self.start + len(data)).to_header(), + ) + self.request.write(data) + + if not data or len(data) < to_read: + self.request.unregisterProducer() + d = self.result + del self.result + d.callback(b"") + return + + self.start += len(data) + self.remaining -= len(data) + assert self.remaining >= 0 + + def pauseProducing(self): + pass + + def stopProducing(self): + pass + + def read_range(request: Request, read_data: Callable[[int, int], bytes]) -> None: """ Read an optional ``Range`` header, reads data appropriately via the given @@ -324,9 +371,20 @@ def read_range(request: Request, read_data: Callable[[int, int], bytes]) -> None The resulting data is written to the request. """ + + def read_data_with_error_handling(offset: int, length: int) -> bytes: + try: + return read_data(offset, length) + except _HTTPError as e: + request.setResponseCode(e.code) + # Empty read means we're done. + return b"" + if request.getHeader("range") is None: d = Deferred() - request.registerProducer(_ReadProducer(request, read_data, d), False) + request.registerProducer( + _ReadAllProducer(request, read_data_with_error_handling, d), False + ) return d range_header = parse_range_header(request.getHeader("range")) @@ -339,21 +397,15 @@ def read_range(request: Request, read_data: Callable[[int, int], bytes]) -> None raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE) offset, end = range_header.ranges[0] - - # TODO limit memory usage - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - data = read_data(offset, end - offset) - request.setResponseCode(http.PARTIAL_CONTENT) - if len(data): - # For empty bodies the content-range header makes no sense since - # the end of the range is inclusive. - request.setHeader( - "content-range", - ContentRange("bytes", offset, offset + len(data)).to_header(), - ) - request.write(data) - request.finish() + d = Deferred() + request.registerProducer( + _ReadRangeProducer( + request, read_data_with_error_handling, d, offset, end - offset + ), + False, + ) + return d class HTTPServer(object): diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 23d9bc276..2382211df 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -10,7 +10,7 @@ from time import sleep, time from cbor2 import dumps from pycddl import ValidationError as CDDLValidationError from hypothesis import assume, given, strategies as st -from fixtures import Fixture, TempDir +from fixtures import Fixture, TempDir, MockPatch from treq.testing import StubTreq from klein import Klein from hyperlink import DecodedURL @@ -314,6 +314,12 @@ class HttpTestFixture(Fixture): def _setUp(self): self.clock = Clock() self.tempdir = self.useFixture(TempDir()) + self.mock = self.useFixture( + MockPatch( + "twisted.internet.task._theCooperator", + Cooperator(scheduler=lambda c: self.clock.callLater(0.000001, c)), + ) + ) self.storage_server = StorageServer( self.tempdir.path, b"\x00" * 20, clock=self.clock ) @@ -325,6 +331,12 @@ class HttpTestFixture(Fixture): treq=self.treq, ) + def result_of_with_flush(self, d): + for i in range(100): + self.clock.advance(0.001) + self.treq.flush() + return result_of(d) + class StorageClientWithHeadersOverride(object): """Wrap ``StorageClient`` and override sent headers.""" @@ -548,7 +560,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # We can now read: for offset, length in [(0, 100), (10, 19), (99, 1), (49, 200)]: - downloaded = result_of( + downloaded = self.http.result_of_with_flush( self.imm_client.read_share_chunk(storage_index, 1, offset, length) ) self.assertEqual(downloaded, expected_data[offset : offset + length]) @@ -623,7 +635,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # The upload of share 1 succeeded, demonstrating that second create() # call didn't overwrite work-in-progress. - downloaded = result_of( + downloaded = self.http.result_of_with_flush( self.imm_client.read_share_chunk(storage_index, 1, 0, 100) ) self.assertEqual(downloaded, b"a" * 50 + b"b" * 50) @@ -753,11 +765,15 @@ class ImmutableHTTPAPITests(SyncTestCase): ) ) self.assertEqual( - result_of(self.imm_client.read_share_chunk(storage_index, 1, 0, 10)), + self.http.result_of_with_flush( + self.imm_client.read_share_chunk(storage_index, 1, 0, 10) + ), b"1" * 10, ) self.assertEqual( - result_of(self.imm_client.read_share_chunk(storage_index, 2, 0, 10)), + self.http.result_of_with_flush( + self.imm_client.read_share_chunk(storage_index, 2, 0, 10) + ), b"2" * 10, ) @@ -921,7 +937,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # Abort didn't prevent reading: self.assertEqual( uploaded_data, - result_of( + self.http.result_of_with_flush( self.imm_client.read_share_chunk( storage_index, 0, @@ -986,8 +1002,12 @@ class MutableHTTPAPIsTests(SyncTestCase): Written data can be read using ``read_share_chunk``. """ storage_index, _, _ = self.create_upload() - data0 = result_of(self.mut_client.read_share_chunk(storage_index, 0, 1, 7)) - data1 = result_of(self.mut_client.read_share_chunk(storage_index, 1, 0, 8)) + data0 = self.http.result_of_with_flush( + self.mut_client.read_share_chunk(storage_index, 0, 1, 7) + ) + data1 = self.http.result_of_with_flush( + self.mut_client.read_share_chunk(storage_index, 1, 0, 8) + ) self.assertEqual((data0, data1), (b"bcdef-0", b"abcdef-1")) def test_read_before_write(self): @@ -1015,8 +1035,12 @@ class MutableHTTPAPIsTests(SyncTestCase): ), ) # But the write did happen: - data0 = result_of(self.mut_client.read_share_chunk(storage_index, 0, 0, 8)) - data1 = result_of(self.mut_client.read_share_chunk(storage_index, 1, 0, 8)) + data0 = self.http.result_of_with_flush( + self.mut_client.read_share_chunk(storage_index, 0, 0, 8) + ) + data1 = self.http.result_of_with_flush( + self.mut_client.read_share_chunk(storage_index, 1, 0, 8) + ) self.assertEqual((data0, data1), (b"aXYZef-0", b"abcdef-1")) def test_conditional_write(self): @@ -1057,7 +1081,9 @@ class MutableHTTPAPIsTests(SyncTestCase): ) self.assertTrue(result.success) self.assertEqual( - result_of(self.mut_client.read_share_chunk(storage_index, 0, 0, 8)), + self.http.result_of_with_flush( + self.mut_client.read_share_chunk(storage_index, 0, 0, 8) + ), b"aXYZef-0", ) @@ -1094,7 +1120,9 @@ class MutableHTTPAPIsTests(SyncTestCase): # The write did not happen: self.assertEqual( - result_of(self.mut_client.read_share_chunk(storage_index, 0, 0, 8)), + self.http.result_of_with_flush( + self.mut_client.read_share_chunk(storage_index, 0, 0, 8) + ), b"abcdef-0", ) @@ -1194,7 +1222,7 @@ class SharedImmutableMutableTestsMixin: Reading from unknown storage index results in 404. """ with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of( + self.http.result_of_with_flush( self.client.read_share_chunk( b"1" * 16, 1, @@ -1209,7 +1237,7 @@ class SharedImmutableMutableTestsMixin: """ storage_index, _, _ = self.upload(1) with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of( + self.http.result_of_with_flush( self.client.read_share_chunk( storage_index, 7, # different share number @@ -1235,7 +1263,7 @@ class SharedImmutableMutableTestsMixin: with assert_fails_with_http_code( self, http.REQUESTED_RANGE_NOT_SATISFIABLE ): - result_of( + self.http.result_of_with_flush( client.read_share_chunk( storage_index, 1, @@ -1264,20 +1292,8 @@ class SharedImmutableMutableTestsMixin: """ A read with no range returns the whole mutable/immutable. """ - self.patch( - task, - "_theCooperator", - Cooperator(scheduler=lambda c: self.http.clock.callLater(0.000001, c)), - ) - - def result_of_with_flush(d): - for i in range(100): - self.http.clock.advance(0.001) - self.http.treq.flush() - return result_of(d) - storage_index, uploaded_data, _ = self.upload(1, data_length) - response = result_of_with_flush( + response = self.http.result_of_with_flush( self.http.client.request( "GET", self.http.client.relative_url( @@ -1298,7 +1314,7 @@ class SharedImmutableMutableTestsMixin: def check_range(requested_range, expected_response): headers = Headers() headers.setRawHeaders("range", [requested_range]) - response = result_of( + response = self.http.result_of_with_flush( self.http.client.request( "GET", self.http.client.relative_url( From 75f33022cd201f2477b86af9c22641f3c69a2188 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 27 Jun 2022 17:00:41 -0400 Subject: [PATCH 072/289] News file. --- newsfragments/3872.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3872.minor diff --git a/newsfragments/3872.minor b/newsfragments/3872.minor new file mode 100644 index 000000000..e69de29bb From efe9575d28dc18089525e9004159ddbe291997d0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 29 Jun 2022 10:51:35 -0400 Subject: [PATCH 073/289] Nicer testing infrastructure so you don't have to switch back and forth between sync and async test APIs. --- src/allmydata/test/test_storage_http.py | 171 ++++++++++++++++-------- 1 file changed, 117 insertions(+), 54 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 2382211df..1f860cca0 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -1,5 +1,21 @@ """ Tests for HTTP storage client + server. + +The tests here are synchronous and don't involve running a real reactor. This +works, but has some caveats when it comes to testing HTTP endpoints: + +* Some HTTP endpoints are synchronous, some are not. +* For synchronous endpoints, the result is immediately available on the + ``Deferred`` coming out of ``StubTreq``. +* For asynchronous endpoints, you need to use ``StubTreq.flush()`` and + iterate the fake in-memory clock/reactor to advance time . + +So for HTTP endpoints, you should use ``HttpTestFixture.result_of_with_flush()`` +which handles both, and patches and moves forward the global Twisted +``Cooperator`` since that is used to drive pull producers. This is, +sadly, an internal implementation detail of Twisted being leaked to tests... + +For definitely synchronous calls, you can just use ``result_of()``. """ from base64 import b64encode @@ -332,10 +348,33 @@ class HttpTestFixture(Fixture): ) def result_of_with_flush(self, d): + """ + Like ``result_of``, but supports fake reactor and ``treq`` testing + infrastructure necessary to support asynchronous HTTP server endpoints. + """ + result = [] + error = [] + d.addCallbacks(result.append, error.append) + + # Check for synchronous HTTP endpoint handler: + if result: + return result[0] + if error: + error[0].raiseException() + + # OK, no result yet, probably async HTTP endpoint handler, so advance + # time, flush treq, and try again: for i in range(100): self.clock.advance(0.001) self.treq.flush() - return result_of(d) + if result: + return result[0] + if error: + error[0].raiseException() + raise RuntimeError( + "We expected given Deferred to have result already, but it wasn't. " + + "This is probably a test design issue." + ) class StorageClientWithHeadersOverride(object): @@ -393,7 +432,7 @@ class GenericHTTPAPITests(SyncTestCase): ) ) with assert_fails_with_http_code(self, http.UNAUTHORIZED): - result_of(client.get_version()) + self.http.result_of_with_flush(client.get_version()) def test_unsupported_mime_type(self): """ @@ -404,7 +443,7 @@ class GenericHTTPAPITests(SyncTestCase): StorageClientWithHeadersOverride(self.http.client, {"accept": "image/gif"}) ) with assert_fails_with_http_code(self, http.NOT_ACCEPTABLE): - result_of(client.get_version()) + self.http.result_of_with_flush(client.get_version()) def test_version(self): """ @@ -414,7 +453,7 @@ class GenericHTTPAPITests(SyncTestCase): might change across calls. """ client = StorageClientGeneral(self.http.client) - version = result_of(client.get_version()) + version = self.http.result_of_with_flush(client.get_version()) version[b"http://allmydata.org/tahoe/protocols/storage/v1"].pop( b"available-space" ) @@ -448,7 +487,7 @@ class GenericHTTPAPITests(SyncTestCase): ) message = {"bad-message": "missing expected keys"} - response = result_of( + response = self.http.result_of_with_flush( self.http.client.request( "POST", url, @@ -481,7 +520,7 @@ class ImmutableHTTPAPITests(SyncTestCase): upload_secret = urandom(32) lease_secret = urandom(32) storage_index = urandom(16) - created = result_of( + created = self.http.result_of_with_flush( self.imm_client.create( storage_index, share_numbers, @@ -525,35 +564,35 @@ class ImmutableHTTPAPITests(SyncTestCase): expected_data[offset : offset + length], ) - upload_progress = result_of(write(10, 10)) + upload_progress = self.http.result_of_with_flush(write(10, 10)) self.assertEqual( upload_progress, UploadProgress(finished=False, required=remaining) ) - upload_progress = result_of(write(30, 10)) + upload_progress = self.http.result_of_with_flush(write(30, 10)) self.assertEqual( upload_progress, UploadProgress(finished=False, required=remaining) ) - upload_progress = result_of(write(50, 10)) + upload_progress = self.http.result_of_with_flush(write(50, 10)) self.assertEqual( upload_progress, UploadProgress(finished=False, required=remaining) ) # Then, an overlapping write with matching data (15-35): - upload_progress = result_of(write(15, 20)) + upload_progress = self.http.result_of_with_flush(write(15, 20)) self.assertEqual( upload_progress, UploadProgress(finished=False, required=remaining) ) # Now fill in the holes: - upload_progress = result_of(write(0, 10)) + upload_progress = self.http.result_of_with_flush(write(0, 10)) self.assertEqual( upload_progress, UploadProgress(finished=False, required=remaining) ) - upload_progress = result_of(write(40, 10)) + upload_progress = self.http.result_of_with_flush(write(40, 10)) self.assertEqual( upload_progress, UploadProgress(finished=False, required=remaining) ) - upload_progress = result_of(write(60, 40)) + upload_progress = self.http.result_of_with_flush(write(60, 40)) self.assertEqual( upload_progress, UploadProgress(finished=True, required=RangeMap()) ) @@ -572,7 +611,7 @@ class ImmutableHTTPAPITests(SyncTestCase): """ (upload_secret, _, storage_index, _) = self.create_upload({1}, 100) with assert_fails_with_http_code(self, http.UNAUTHORIZED): - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -594,7 +633,7 @@ class ImmutableHTTPAPITests(SyncTestCase): ) # Write half of share 1 - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -608,7 +647,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # existing shares, this call shouldn't overwrite the existing # work-in-progress. upload_secret2 = b"x" * 2 - created2 = result_of( + created2 = self.http.result_of_with_flush( self.imm_client.create( storage_index, {1, 4, 6}, @@ -622,7 +661,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # Write second half of share 1 self.assertTrue( - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -642,7 +681,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # We can successfully upload the shares created with the second upload secret. self.assertTrue( - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 4, @@ -660,11 +699,14 @@ class ImmutableHTTPAPITests(SyncTestCase): (upload_secret, _, storage_index, created) = self.create_upload({1, 2, 3}, 10) # Initially there are no shares: - self.assertEqual(result_of(self.imm_client.list_shares(storage_index)), set()) + self.assertEqual( + self.http.result_of_with_flush(self.imm_client.list_shares(storage_index)), + set(), + ) # Upload shares 1 and 3: for share_number in [1, 3]: - progress = result_of( + progress = self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, share_number, @@ -676,7 +718,10 @@ class ImmutableHTTPAPITests(SyncTestCase): self.assertTrue(progress.finished) # Now shares 1 and 3 exist: - self.assertEqual(result_of(self.imm_client.list_shares(storage_index)), {1, 3}) + self.assertEqual( + self.http.result_of_with_flush(self.imm_client.list_shares(storage_index)), + {1, 3}, + ) def test_upload_bad_content_range(self): """ @@ -694,7 +739,7 @@ class ImmutableHTTPAPITests(SyncTestCase): with assert_fails_with_http_code( self, http.REQUESTED_RANGE_NOT_SATISFIABLE ): - result_of( + self.http.result_of_with_flush( client.write_share_chunk( storage_index, 1, @@ -714,7 +759,10 @@ class ImmutableHTTPAPITests(SyncTestCase): Listing unknown storage index's shares results in empty list of shares. """ storage_index = bytes(range(16)) - self.assertEqual(result_of(self.imm_client.list_shares(storage_index)), set()) + self.assertEqual( + self.http.result_of_with_flush(self.imm_client.list_shares(storage_index)), + set(), + ) def test_upload_non_existent_storage_index(self): """ @@ -725,7 +773,7 @@ class ImmutableHTTPAPITests(SyncTestCase): def unknown_check(storage_index, share_number): with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, share_number, @@ -746,7 +794,7 @@ class ImmutableHTTPAPITests(SyncTestCase): stored separately and can be downloaded separately. """ (upload_secret, _, storage_index, _) = self.create_upload({1, 2}, 10) - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -755,7 +803,7 @@ class ImmutableHTTPAPITests(SyncTestCase): b"1" * 10, ) ) - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 2, @@ -785,7 +833,7 @@ class ImmutableHTTPAPITests(SyncTestCase): (upload_secret, _, storage_index, created) = self.create_upload({1}, 100) # Write: - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -797,7 +845,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # Conflicting write: with assert_fails_with_http_code(self, http.CONFLICT): - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -823,7 +871,7 @@ class ImmutableHTTPAPITests(SyncTestCase): """ def abort(storage_index, share_number, upload_secret): - return result_of( + return self.http.result_of_with_flush( self.imm_client.abort_upload(storage_index, share_number, upload_secret) ) @@ -836,7 +884,7 @@ class ImmutableHTTPAPITests(SyncTestCase): """ # Start an upload: (upload_secret, _, storage_index, _) = self.create_upload({1}, 100) - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -855,7 +903,7 @@ class ImmutableHTTPAPITests(SyncTestCase): # complaint: upload_secret = urandom(32) lease_secret = urandom(32) - created = result_of( + created = self.http.result_of_with_flush( self.imm_client.create( storage_index, {1}, @@ -868,7 +916,7 @@ class ImmutableHTTPAPITests(SyncTestCase): self.assertEqual(created.allocated, {1}) # And write to it, too: - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -887,7 +935,9 @@ class ImmutableHTTPAPITests(SyncTestCase): for si, num in [(storage_index, 3), (b"x" * 16, 1)]: with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of(self.imm_client.abort_upload(si, num, upload_secret)) + self.http.result_of_with_flush( + self.imm_client.abort_upload(si, num, upload_secret) + ) def test_unauthorized_abort(self): """ @@ -898,12 +948,12 @@ class ImmutableHTTPAPITests(SyncTestCase): # Failed to abort becaues wrong upload secret: with assert_fails_with_http_code(self, http.UNAUTHORIZED): - result_of( + self.http.result_of_with_flush( self.imm_client.abort_upload(storage_index, 1, upload_secret + b"X") ) # We can still write to it: - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 1, @@ -920,7 +970,7 @@ class ImmutableHTTPAPITests(SyncTestCase): """ uploaded_data = b"123" (upload_secret, _, storage_index, _) = self.create_upload({0}, 3) - result_of( + self.http.result_of_with_flush( self.imm_client.write_share_chunk( storage_index, 0, @@ -932,7 +982,9 @@ class ImmutableHTTPAPITests(SyncTestCase): # Can't abort, we finished upload: with assert_fails_with_http_code(self, http.NOT_ALLOWED): - result_of(self.imm_client.abort_upload(storage_index, 0, upload_secret)) + self.http.result_of_with_flush( + self.imm_client.abort_upload(storage_index, 0, upload_secret) + ) # Abort didn't prevent reading: self.assertEqual( @@ -954,7 +1006,7 @@ class ImmutableHTTPAPITests(SyncTestCase): storage_index = urandom(16) secret = b"A" * 32 with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of( + self.http.result_of_with_flush( self.general_client.add_or_renew_lease(storage_index, secret, secret) ) @@ -975,7 +1027,7 @@ class MutableHTTPAPIsTests(SyncTestCase): write_secret = urandom(32) lease_secret = urandom(32) storage_index = urandom(16) - result_of( + self.http.result_of_with_flush( self.mut_client.read_test_write_chunks( storage_index, write_secret, @@ -1013,7 +1065,7 @@ class MutableHTTPAPIsTests(SyncTestCase): def test_read_before_write(self): """In combo read/test/write operation, reads happen before writes.""" storage_index, write_secret, lease_secret = self.create_upload() - result = result_of( + result = self.http.result_of_with_flush( self.mut_client.read_test_write_chunks( storage_index, write_secret, @@ -1046,7 +1098,7 @@ class MutableHTTPAPIsTests(SyncTestCase): def test_conditional_write(self): """Uploads only happen if the test passes.""" storage_index, write_secret, lease_secret = self.create_upload() - result_failed = result_of( + result_failed = self.http.result_of_with_flush( self.mut_client.read_test_write_chunks( storage_index, write_secret, @@ -1064,7 +1116,7 @@ class MutableHTTPAPIsTests(SyncTestCase): self.assertFalse(result_failed.success) # This time the test matches: - result = result_of( + result = self.http.result_of_with_flush( self.mut_client.read_test_write_chunks( storage_index, write_secret, @@ -1090,19 +1142,22 @@ class MutableHTTPAPIsTests(SyncTestCase): def test_list_shares(self): """``list_shares()`` returns the shares for a given storage index.""" storage_index, _, _ = self.create_upload() - self.assertEqual(result_of(self.mut_client.list_shares(storage_index)), {0, 1}) + self.assertEqual( + self.http.result_of_with_flush(self.mut_client.list_shares(storage_index)), + {0, 1}, + ) def test_non_existent_list_shares(self): """A non-existent storage index errors when shares are listed.""" with self.assertRaises(ClientException) as exc: - result_of(self.mut_client.list_shares(urandom(32))) + self.http.result_of_with_flush(self.mut_client.list_shares(urandom(32))) self.assertEqual(exc.exception.code, http.NOT_FOUND) def test_wrong_write_enabler(self): """Writes with the wrong write enabler fail, and are not processed.""" storage_index, write_secret, lease_secret = self.create_upload() with self.assertRaises(ClientException) as exc: - result_of( + self.http.result_of_with_flush( self.mut_client.read_test_write_chunks( storage_index, urandom(32), @@ -1161,7 +1216,9 @@ class SharedImmutableMutableTestsMixin: storage_index, _, _ = self.upload(13) reason = "OHNO \u1235" - result_of(self.client.advise_corrupt_share(storage_index, 13, reason)) + self.http.result_of_with_flush( + self.client.advise_corrupt_share(storage_index, 13, reason) + ) self.assertEqual( corrupted, @@ -1174,11 +1231,15 @@ class SharedImmutableMutableTestsMixin: """ storage_index, _, _ = self.upload(13) reason = "OHNO \u1235" - result_of(self.client.advise_corrupt_share(storage_index, 13, reason)) + self.http.result_of_with_flush( + self.client.advise_corrupt_share(storage_index, 13, reason) + ) for (si, share_number) in [(storage_index, 11), (urandom(16), 13)]: with assert_fails_with_http_code(self, http.NOT_FOUND): - result_of(self.client.advise_corrupt_share(si, share_number, reason)) + self.http.result_of_with_flush( + self.client.advise_corrupt_share(si, share_number, reason) + ) def test_lease_renew_and_add(self): """ @@ -1196,7 +1257,7 @@ class SharedImmutableMutableTestsMixin: self.http.clock.advance(167) # We renew the lease: - result_of( + self.http.result_of_with_flush( self.general_client.add_or_renew_lease( storage_index, lease_secret, lease_secret ) @@ -1207,7 +1268,7 @@ class SharedImmutableMutableTestsMixin: # We create a new lease: lease_secret2 = urandom(32) - result_of( + self.http.result_of_with_flush( self.general_client.add_or_renew_lease( storage_index, lease_secret2, lease_secret2 ) @@ -1302,7 +1363,9 @@ class SharedImmutableMutableTestsMixin: ) ) self.assertEqual(response.code, http.OK) - self.assertEqual(result_of(response.content()), uploaded_data) + self.assertEqual( + self.http.result_of_with_flush(response.content()), uploaded_data + ) def test_validate_content_range_response_to_read(self): """ @@ -1354,7 +1417,7 @@ class ImmutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): upload_secret = urandom(32) lease_secret = urandom(32) storage_index = urandom(16) - result_of( + self.http.result_of_with_flush( self.client.create( storage_index, {share_number}, @@ -1364,7 +1427,7 @@ class ImmutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): lease_secret, ) ) - result_of( + self.http.result_of_with_flush( self.client.write_share_chunk( storage_index, share_number, @@ -1399,7 +1462,7 @@ class MutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): write_secret = urandom(32) lease_secret = urandom(32) storage_index = urandom(16) - result_of( + self.http.result_of_with_flush( self.client.read_test_write_chunks( storage_index, write_secret, From 520456bdc0411845715798ac72cd8a88686b798f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 29 Jun 2022 11:26:25 -0400 Subject: [PATCH 074/289] Add streaming to CBOR results. --- src/allmydata/storage/http_server.py | 45 ++++++++++++++++++---------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index a91b7963e..f354fd837 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -3,16 +3,16 @@ HTTP server for storage. """ from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any, Callable +from typing import Dict, List, Set, Tuple, Any, Callable from functools import wraps from base64 import b64decode import binascii +from tempfile import TemporaryFile from zope.interface import implementer from klein import Klein from twisted.web import http -from twisted.web.server import NOT_DONE_YET from twisted.internet.interfaces import ( IListeningPort, IStreamServerEndpoint, @@ -37,7 +37,7 @@ from cryptography.x509 import load_pem_x509_certificate # TODO Make sure to use pure Python versions? -from cbor2 import dumps, loads +from cbor2 import dump, loads from pycddl import Schema, ValidationError as CDDLValidationError from .server import StorageServer from .http_common import ( @@ -279,6 +279,10 @@ _SCHEMAS = { } +# Callabale that takes offset and length, returns the data at that range. +ReadData = Callable[[int, int], bytes] + + @implementer(IPullProducer) @define class _ReadAllProducer: @@ -288,10 +292,20 @@ class _ReadAllProducer: """ request: Request - read_data: Callable[[int, int], bytes] - result: Deferred + read_data: ReadData + result: Deferred = field(factory=Deferred) start: int = field(default=0) + @classmethod + def produce_to(cls, request: Request, read_data: ReadData) -> Deferred: + """ + Create and register the producer, returning ``Deferred`` that should be + returned from a HTTP server endpoint. + """ + producer = cls(request, read_data) + request.registerProducer(producer, False) + return producer.result + def resumeProducing(self): data = self.read_data(self.start, 65536) if not data: @@ -319,7 +333,7 @@ class _ReadRangeProducer: """ request: Request - read_data: Callable[[int, int], bytes] + read_data: ReadData result: Deferred start: int remaining: int @@ -356,7 +370,7 @@ class _ReadRangeProducer: pass -def read_range(request: Request, read_data: Callable[[int, int], bytes]) -> None: +def read_range(request: Request, read_data: ReadData) -> None: """ Read an optional ``Range`` header, reads data appropriately via the given callable, writes the data to the request. @@ -381,11 +395,7 @@ def read_range(request: Request, read_data: Callable[[int, int], bytes]) -> None return b"" if request.getHeader("range") is None: - d = Deferred() - request.registerProducer( - _ReadAllProducer(request, read_data_with_error_handling, d), False - ) - return d + return _ReadAllProducer.produce_to(request, read_data_with_error_handling) range_header = parse_range_header(request.getHeader("range")) if ( @@ -459,9 +469,14 @@ class HTTPServer(object): accept = parse_accept_header(accept_headers[0]) if accept.best == CBOR_MIME_TYPE: request.setHeader("Content-Type", CBOR_MIME_TYPE) - # TODO if data is big, maybe want to use a temporary file eventually... - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - return dumps(data) + f = TemporaryFile() + dump(data, f) + + def read_data(offset: int, length: int) -> bytes: + f.seek(offset) + return f.read(length) + + return _ReadAllProducer.produce_to(request, read_data) else: # TODO Might want to optionally send JSON someday: # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3861 From 0e8f2aa7024c75ba01943fb3f1fbce7160c8a799 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 29 Jun 2022 11:48:54 -0400 Subject: [PATCH 075/289] More memory usage reductions. --- src/allmydata/storage/http_server.py | 38 ++++++++++++++++--------- src/allmydata/test/test_storage_http.py | 9 ++++++ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index f354fd837..98bd419c1 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -245,6 +245,8 @@ class _HTTPError(Exception): # Tags are of the form #6.nnn, where the number is documented at # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml. Notably, #6.258 # indicates a set. +# +# TODO 3872 length limits in the schema. _SCHEMAS = { "allocate_buckets": Schema( """ @@ -485,12 +487,18 @@ class HTTPServer(object): def _read_encoded(self, request, schema: Schema) -> Any: """ Read encoded request body data, decoding it with CBOR by default. + + Somewhat arbitrarily, limit body size to 1MB; this may be too low, we + may want to customize per query type, but this is the starting point + for now. """ content_type = get_content_type(request.requestHeaders) if content_type == CBOR_MIME_TYPE: - # TODO limit memory usage, client could send arbitrarily large data... - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - message = request.content.read() + # Read 1 byte more than 1MB. We expect length to be 1MB or + # less; if it's more assume it's not a legitimate message. + message = request.content.read(1024 * 1024 + 1) + if len(message) > 1024 * 1024: + raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE) schema.validate_cbor(message) result = loads(message) return result @@ -586,20 +594,24 @@ class HTTPServer(object): request.setResponseCode(http.REQUESTED_RANGE_NOT_SATISFIABLE) return b"" - offset = content_range.start - - # TODO limit memory usage - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - data = request.content.read(content_range.stop - content_range.start + 1) bucket = self._uploads.get_write_bucket( storage_index, share_number, authorization[Secrets.UPLOAD] ) + offset = content_range.start + remaining = content_range.stop - content_range.start + finished = False - try: - finished = bucket.write(offset, data) - except ConflictingWriteError: - request.setResponseCode(http.CONFLICT) - return b"" + while remaining > 0: + data = request.content.read(min(remaining, 65536)) + assert data, "uploaded data length doesn't match range" + + try: + finished = bucket.write(offset, data) + except ConflictingWriteError: + request.setResponseCode(http.CONFLICT) + return b"" + remaining -= len(data) + offset += len(data) if finished: bucket.close() diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 1f860cca0..5418660c0 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -1139,6 +1139,15 @@ class MutableHTTPAPIsTests(SyncTestCase): b"aXYZef-0", ) + def test_too_large_write(self): + """ + Writing too large of a chunk results in a REQUEST ENTITY TOO LARGE http + error. + """ + with self.assertRaises(ClientException) as e: + self.create_upload(b"0123456789" * 1024 * 1024) + self.assertEqual(e.exception.code, http.REQUEST_ENTITY_TOO_LARGE) + def test_list_shares(self): """``list_shares()`` returns the shares for a given storage index.""" storage_index, _, _ = self.create_upload() From ab80c0f0a17affc87489cb29c031fb072803fb90 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 29 Jun 2022 14:04:42 -0400 Subject: [PATCH 076/289] Set some length limits on various queries lengths. --- src/allmydata/storage/http_server.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 98bd419c1..50e4ec946 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -246,12 +246,14 @@ class _HTTPError(Exception): # https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml. Notably, #6.258 # indicates a set. # -# TODO 3872 length limits in the schema. +# Somewhat arbitrary limits are set to reduce e.g. number of shares, number of +# vectors, etc.. These may need to be iterated on in future revisions of the +# code. _SCHEMAS = { "allocate_buckets": Schema( """ request = { - share-numbers: #6.258([* uint]) + share-numbers: #6.258([*30 uint]) allocated-size: uint } """ @@ -267,13 +269,15 @@ _SCHEMAS = { """ request = { "test-write-vectors": { - * share_number: { - "test": [* {"offset": uint, "size": uint, "specimen": bstr}] - "write": [* {"offset": uint, "data": bstr}] + ; TODO Add length limit here, after + ; https://github.com/anweiss/cddl/issues/128 is fixed + * share_number => { + "test": [*30 {"offset": uint, "size": uint, "specimen": bstr}] + "write": [*30 {"offset": uint, "data": bstr}] "new-length": uint / null } } - "read-vector": [* {"offset": uint, "size": uint}] + "read-vector": [*30 {"offset": uint, "size": uint}] } share_number = uint """ From bee46fae93494206c843633592ac04cbd65849b5 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 13:48:33 -0400 Subject: [PATCH 077/289] Resource limits on the client side. --- src/allmydata/storage/http_client.py | 34 ++++++++++++++++++--- src/allmydata/test/test_storage_http.py | 40 +++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 9203d02ab..b8bd0bf20 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -6,6 +6,7 @@ from __future__ import annotations from typing import Union, Optional, Sequence, Mapping from base64 import b64encode +from io import BytesIO from attrs import define, asdict, frozen, field @@ -114,6 +115,33 @@ _SCHEMAS = { } +@define +class _LengthLimitedCollector: + """ + Collect data using ``treq.collect()``, with limited length. + """ + + remaining_length: int + f: BytesIO = field(factory=BytesIO) + + def __call__(self, data: bytes): + if len(data) > self.remaining_length: + raise ValueError("Response length was too long") + self.f.write(data) + + +def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred: + """ + Like ``treq.content()``, but limit data read from the response to a set + length. If the response is longer than the max allowed length, the result + fails with a ``ValueError``. + """ + collector = _LengthLimitedCollector(max_length) + d = treq.collect(response, collector) + d.addCallback(lambda _: collector.f.getvalue()) + return d + + def _decode_cbor(response, schema: Schema): """Given HTTP response, return decoded CBOR body.""" @@ -124,9 +152,7 @@ def _decode_cbor(response, schema: Schema): if response.code > 199 and response.code < 300: content_type = get_content_type(response.headers) if content_type == CBOR_MIME_TYPE: - # TODO limit memory usage - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3872 - return treq.content(response).addCallback(got_content) + return limited_content(response).addCallback(got_content) else: raise ClientException(-1, "Server didn't send CBOR") else: @@ -295,7 +321,7 @@ class StorageClient(object): write_enabler_secret=None, headers=None, message_to_serialize=None, - **kwargs + **kwargs, ): """ Like ``treq.request()``, but with optional secrets that get translated diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 5418660c0..915cd33f2 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -65,6 +65,7 @@ from ..storage.http_client import ( ReadVector, ReadTestWriteResult, TestVector, + limited_content, ) @@ -255,6 +256,11 @@ class TestApp(object): request.setHeader("content-type", CBOR_MIME_TYPE) return dumps({"garbage": 123}) + @_authorized_route(_app, set(), "/millionbytes", methods=["GET"]) + def million_bytes(self, request, authorization): + """Return 1,000,000 bytes.""" + return b"0123456789" * 100_000 + def result_of(d): """ @@ -320,6 +326,40 @@ class CustomHTTPServerTests(SyncTestCase): with self.assertRaises(CDDLValidationError): result_of(client.get_version()) + def test_limited_content_fits(self): + """ + ``http_client.limited_content()`` returns the body if it is less than + the max length. + """ + for at_least_length in (1_000_000, 1_000_001): + response = result_of( + self.client.request( + "GET", + "http://127.0.0.1/millionbytes", + ) + ) + + self.assertEqual( + result_of(limited_content(response, at_least_length)), + b"0123456789" * 100_000, + ) + + def test_limited_content_does_not_fit(self): + """ + If the body is longer than than max length, + ``http_client.limited_content()`` fails with a ``ValueError``. + """ + for too_short in (999_999, 10): + response = result_of( + self.client.request( + "GET", + "http://127.0.0.1/millionbytes", + ) + ) + + with self.assertRaises(ValueError): + result_of(limited_content(response, too_short)) + class HttpTestFixture(Fixture): """ From 451e68795cf5cfb02fabdf6baa870289b978a8f7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 13:54:58 -0400 Subject: [PATCH 078/289] Lints, better explanation. --- src/allmydata/test/test_storage_http.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 915cd33f2..3108ffae8 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -22,7 +22,6 @@ from base64 import b64encode from contextlib import contextmanager from os import urandom from typing import Union, Callable, Tuple, Iterable -from time import sleep, time from cbor2 import dumps from pycddl import ValidationError as CDDLValidationError from hypothesis import assume, given, strategies as st @@ -32,7 +31,6 @@ from klein import Klein from hyperlink import DecodedURL from collections_extended import RangeMap from twisted.internet.task import Clock, Cooperator -from twisted.internet import task from twisted.web import http from twisted.web.http_headers import Headers from werkzeug import routing @@ -370,6 +368,10 @@ class HttpTestFixture(Fixture): def _setUp(self): self.clock = Clock() self.tempdir = self.useFixture(TempDir()) + # The global Cooperator used by Twisted (a) used by pull producers in + # twisted.web, (b) is driven by a real reactor. We want to push time + # forward ourselves since we rely on pull producers in the HTTP storage + # server. self.mock = self.useFixture( MockPatch( "twisted.internet.task._theCooperator", From 03c515191edc519ad045191f18c5d558d4a19e35 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 14:21:21 -0400 Subject: [PATCH 079/289] Better docs. --- src/allmydata/protocol_switch.py | 40 +++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 9b4e30671..20984c615 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -1,8 +1,15 @@ """ -Support for listening with both HTTP and Foolscap on the same port. -""" +Support for listening with both HTTPS and Foolscap on the same port. -from typing import Optional +The goal is to make the transition from Foolscap to HTTPS-based protocols as +simple as possible, with no extra configuration needed. Listening on the same +port means a user upgrading Tahoe-LAFS will automatically get HTTPS working +with no additional changes. + +Use ``create_foolscap_or_http_class()`` to create a new subclass per ``Tub``, +and then ``update_foolscap_or_http_class()`` to add the relevant information to +the subclass once it becomes available later in the configuration process. +""" from twisted.internet.protocol import Protocol from twisted.internet.interfaces import IDelayedCall @@ -17,18 +24,26 @@ from .storage.http_server import HTTPServer from .storage.server import StorageServer -class PretendToBeNegotiation(type): - """😱""" +class _PretendToBeNegotiation(type): + """ + Metaclass that allows ``_FoolscapOrHttps`` to pretend to be a ``Negotiation`` + instance, since Foolscap has some ``assert isinstance(protocol, + Negotiation`` checks. + """ def __instancecheck__(self, instance): return (instance.__class__ == self) or isinstance(instance, Negotiation) -class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): +class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): """ Based on initial query, decide whether we're talking Foolscap or HTTP. - Pretends to be a ``foolscap.negotiate.Negotiation`` instance. + Additionally, pretends to be a ``foolscap.negotiate.Negotiation`` instance, + since these are created by Foolscap's ``Tub``, by setting this to be the + tub's ``negotiationClass``. + + Do not use directly; this needs to be subclassed per ``Tub``. """ # These three will be set by a subclass in update_foolscap_or_http_class() @@ -110,13 +125,22 @@ class FoolscapOrHttp(Protocol, metaclass=PretendToBeNegotiation): def create_foolscap_or_http_class(): - class FoolscapOrHttpWithCert(FoolscapOrHttp): + """ + Create a new Foolscap-or-HTTPS protocol class for a specific ``Tub`` + instance. + """ + + class FoolscapOrHttpWithCert(_FoolscapOrHttps): pass return FoolscapOrHttpWithCert def update_foolscap_or_http_class(cls, certificate, storage_server, swissnum): + """ + Add the various parameters needed by a ``Tub``-specific + ``_FoolscapOrHttps`` subclass. + """ cls.certificate = certificate cls.storage_server = storage_server cls.swissnum = swissnum From d1bdce9682f9c6c8eefd1488db7c8c8bfc7cdf6b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 14:26:36 -0400 Subject: [PATCH 080/289] A nicer API. --- src/allmydata/client.py | 5 +++-- src/allmydata/node.py | 4 ++-- src/allmydata/protocol_switch.py | 32 +++++++++++++++++--------------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 294684b58..2f68c1cb4 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -64,7 +64,6 @@ from allmydata.interfaces import ( from allmydata.nodemaker import NodeMaker from allmydata.blacklist import Blacklist from allmydata import node -from .protocol_switch import update_foolscap_or_http_class KiB=1024 @@ -820,7 +819,9 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - update_foolscap_or_http_class(self.tub.negotiationClass, self.tub.myCertificate, ss, swissnum.encode("ascii")) + self.tub.negotiationClass.add_storage_server( + self.tub.myCertificate, ss, swissnum.encode("ascii") + ) announcement["anonymous-storage-FURL"] = furl diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 93fa6a8e1..597221e9b 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -55,7 +55,7 @@ from allmydata.util.yamlutil import ( from . import ( __full_version__, ) -from .protocol_switch import create_foolscap_or_http_class +from .protocol_switch import support_foolscap_and_https def _common_valid_config(): @@ -709,7 +709,7 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han the new Tub via `Tub.setOption` """ tub = Tub(**kwargs) - tub.negotiationClass = create_foolscap_or_http_class() + support_foolscap_and_https(tub) for (name, value) in list(tub_options.items()): tub.setOption(name, value) handlers = default_connection_handlers.copy() diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 20984c615..7623d68e5 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -6,9 +6,10 @@ simple as possible, with no extra configuration needed. Listening on the same port means a user upgrading Tahoe-LAFS will automatically get HTTPS working with no additional changes. -Use ``create_foolscap_or_http_class()`` to create a new subclass per ``Tub``, -and then ``update_foolscap_or_http_class()`` to add the relevant information to -the subclass once it becomes available later in the configuration process. +Use ``support_foolscap_and_https()`` to create a new subclass for a ``Tub`` +instance, and then ``add_storage_server()`` on the resulting class to add the +relevant information for a storage server once it becomes available later in +the configuration process. """ from twisted.internet.protocol import Protocol @@ -19,6 +20,7 @@ from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.internet import reactor from foolscap.negotiate import Negotiation +from foolscap.api import Tub from .storage.http_server import HTTPServer from .storage.server import StorageServer @@ -54,6 +56,16 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): _timeout: IDelayedCall + @classmethod + def add_storage_server(cls, certificate, storage_server, swissnum): + """ + Add the various parameters needed by a ``Tub``-specific + ``_FoolscapOrHttps`` subclass. + """ + cls.certificate = certificate + cls.storage_server = storage_server + cls.swissnum = swissnum + def __init__(self, *args, **kwargs): self._foolscap: Negotiation = Negotiation(*args, **kwargs) self._buffer: bytes = b"" @@ -124,7 +136,7 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): self.__dict__ = protocol.__dict__ -def create_foolscap_or_http_class(): +def support_foolscap_and_https(tub: Tub): """ Create a new Foolscap-or-HTTPS protocol class for a specific ``Tub`` instance. @@ -133,14 +145,4 @@ def create_foolscap_or_http_class(): class FoolscapOrHttpWithCert(_FoolscapOrHttps): pass - return FoolscapOrHttpWithCert - - -def update_foolscap_or_http_class(cls, certificate, storage_server, swissnum): - """ - Add the various parameters needed by a ``Tub``-specific - ``_FoolscapOrHttps`` subclass. - """ - cls.certificate = certificate - cls.storage_server = storage_server - cls.swissnum = swissnum + tub.negotiationClass = FoolscapOrHttpWithCert # type: ignore From 03d9ff395cce0aeff1b1e08b80c8c073907cd3ad Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 14:30:19 -0400 Subject: [PATCH 081/289] News file. --- newsfragments/3902.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3902.feature diff --git a/newsfragments/3902.feature b/newsfragments/3902.feature new file mode 100644 index 000000000..2477d0ae6 --- /dev/null +++ b/newsfragments/3902.feature @@ -0,0 +1 @@ +The new HTTPS-based storage server is now enabled transparently on the same port as the Foolscap server. This will not have any user-facing impact until the HTTPS storage protocol is supported in clients as well. \ No newline at end of file From 1798966f03c652396d434e14fb41e76607015cc4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 14:52:12 -0400 Subject: [PATCH 082/289] Store the tub on the subclass, since we'll want it (or rather its Listeners) for NURL construction. --- src/allmydata/client.py | 4 +--- src/allmydata/protocol_switch.py | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 2f68c1cb4..e737f93e6 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -819,9 +819,7 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - self.tub.negotiationClass.add_storage_server( - self.tub.myCertificate, ss, swissnum.encode("ascii") - ) + self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) announcement["anonymous-storage-FURL"] = furl diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 7623d68e5..059339575 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -48,21 +48,25 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): Do not use directly; this needs to be subclassed per ``Tub``. """ - # These three will be set by a subclass in update_foolscap_or_http_class() - # below. + # These will be set by support_foolscap_and_https() and add_storage_server(). + + # The swissnum for the storage_server. swissnum: bytes - certificate: PrivateCertificate + # The storage server we're exposing. storage_server: StorageServer + # The tub that created us: + tub: Tub + # The certificate for the endpoint: + certificate: PrivateCertificate _timeout: IDelayedCall @classmethod - def add_storage_server(cls, certificate, storage_server, swissnum): + def add_storage_server(cls, storage_server, swissnum): """ - Add the various parameters needed by a ``Tub``-specific - ``_FoolscapOrHttps`` subclass. + Add the various storage server-related attributes needed by a + ``Tub``-specific ``_FoolscapOrHttps`` subclass. """ - cls.certificate = certificate cls.storage_server = storage_server cls.swissnum = swissnum @@ -141,8 +145,10 @@ def support_foolscap_and_https(tub: Tub): Create a new Foolscap-or-HTTPS protocol class for a specific ``Tub`` instance. """ + the_tub = tub class FoolscapOrHttpWithCert(_FoolscapOrHttps): - pass + tub = the_tub + certificate = tub.myCertificate tub.negotiationClass = FoolscapOrHttpWithCert # type: ignore From 3db6080f6d62907e9eeaa8de5b6cd1a480b96e23 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 15:18:22 -0400 Subject: [PATCH 083/289] Make the factories a class-level attribute. --- src/allmydata/protocol_switch.py | 45 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 059339575..21d896793 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -14,7 +14,7 @@ the configuration process. from twisted.internet.protocol import Protocol from twisted.internet.interfaces import IDelayedCall -from twisted.internet.ssl import CertificateOptions, PrivateCertificate +from twisted.internet.ssl import CertificateOptions from twisted.web.server import Site from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.internet import reactor @@ -50,25 +50,35 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): # These will be set by support_foolscap_and_https() and add_storage_server(). - # The swissnum for the storage_server. - swissnum: bytes - # The storage server we're exposing. - storage_server: StorageServer + # The HTTP storage server API we're exposing. + http_storage_server: HTTPServer + # The Twisted HTTPS protocol factory wrapping the storage server API: + https_factory: TLSMemoryBIOFactory # The tub that created us: tub: Tub - # The certificate for the endpoint: - certificate: PrivateCertificate + # This will be created by the instance in connectionMade(): _timeout: IDelayedCall @classmethod - def add_storage_server(cls, storage_server, swissnum): + def add_storage_server(cls, storage_server: StorageServer, swissnum): """ Add the various storage server-related attributes needed by a ``Tub``-specific ``_FoolscapOrHttps`` subclass. """ - cls.storage_server = storage_server - cls.swissnum = swissnum + # Tub.myCertificate is a twisted.internet.ssl.PrivateCertificate + # instance. + certificate_options = CertificateOptions( + privateKey=cls.tub.myCertificate.privateKey.original, + certificate=cls.tub.myCertificate.original, + ) + + cls.http_storage_server = HTTPServer(storage_server, swissnum) + cls.https_factory = TLSMemoryBIOFactory( + certificate_options, + False, + Site(cls.http_storage_server.get_resource()), + ) def __init__(self, *args, **kwargs): self._foolscap: Negotiation = Negotiation(*args, **kwargs) @@ -124,16 +134,8 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): return else: # We're a HTTPS protocol instance, serving the storage protocol: - certificate_options = CertificateOptions( - privateKey=self.certificate.privateKey.original, - certificate=self.certificate.original, - ) - http_server = HTTPServer(self.storage_server, self.swissnum) - factory = TLSMemoryBIOFactory( - certificate_options, False, Site(http_server.get_resource()) - ) assert self.transport is not None - protocol = factory.buildProtocol(self.transport.getPeer()) + protocol = self.https_factory.buildProtocol(self.transport.getPeer()) protocol.makeConnection(self.transport) protocol.dataReceived(self._buffer) self.__class__ = protocol.__class__ @@ -147,8 +149,7 @@ def support_foolscap_and_https(tub: Tub): """ the_tub = tub - class FoolscapOrHttpWithCert(_FoolscapOrHttps): + class FoolscapOrHttpForTub(_FoolscapOrHttps): tub = the_tub - certificate = tub.myCertificate - tub.negotiationClass = FoolscapOrHttpWithCert # type: ignore + tub.negotiationClass = FoolscapOrHttpForTub # type: ignore From 70dfc4484173bb9592d02834e14bb85d8356a14c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 15:45:30 -0400 Subject: [PATCH 084/289] Fix for 3905. --- src/allmydata/storage/http_server.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 06a6863fa..f61030844 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -188,7 +188,12 @@ class UploadsInProgress(object): def remove_write_bucket(self, bucket: BucketWriter): """Stop tracking the given ``BucketWriter``.""" - storage_index, share_number = self._bucketwriters.pop(bucket) + try: + storage_index, share_number = self._bucketwriters.pop(bucket) + except KeyError: + # This is probably a BucketWriter created by Foolscap, so just + # ignore it. + return uploads_index = self._uploads[storage_index] uploads_index.shares.pop(share_number) uploads_index.upload_secrets.pop(share_number) From f2acf71998475bb8b5eef981f3c3b93e23432561 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 30 Jun 2022 15:58:52 -0400 Subject: [PATCH 085/289] Document next steps: NURL generation. --- src/allmydata/protocol_switch.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 21d896793..9f33560e7 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -66,6 +66,14 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): Add the various storage server-related attributes needed by a ``Tub``-specific ``_FoolscapOrHttps`` subclass. """ + # TODO tub.locationHints will be in the format ["tcp:hostname:port"] + # (and maybe some other things we can ignore for now). We also have + # access to the certificate. Together, this should be sufficient to + # construct NURLs, one per hint. The code for NURls should be + # refactored out of http_server.py's build_nurl; that code might want + # to skip around for the future when we don't do foolscap, but for now + # this module will be main way we set up HTTPS. + # Tub.myCertificate is a twisted.internet.ssl.PrivateCertificate # instance. certificate_options = CertificateOptions( From 249f43184972d124d5144dccf52f3cb78662a523 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 5 Jul 2022 11:14:52 -0400 Subject: [PATCH 086/289] Use MonkeyPatch instead of MockPatch, since we're not mocking. --- src/allmydata/test/test_storage_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 3108ffae8..811cc2ac1 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -25,7 +25,7 @@ from typing import Union, Callable, Tuple, Iterable from cbor2 import dumps from pycddl import ValidationError as CDDLValidationError from hypothesis import assume, given, strategies as st -from fixtures import Fixture, TempDir, MockPatch +from fixtures import Fixture, TempDir, MonkeyPatch from treq.testing import StubTreq from klein import Klein from hyperlink import DecodedURL @@ -373,7 +373,7 @@ class HttpTestFixture(Fixture): # forward ourselves since we rely on pull producers in the HTTP storage # server. self.mock = self.useFixture( - MockPatch( + MonkeyPatch( "twisted.internet.task._theCooperator", Cooperator(scheduler=lambda c: self.clock.callLater(0.000001, c)), ) From 97d0ba23ebc48c3b5af378446b5adc3189b608a9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 5 Jul 2022 11:21:46 -0400 Subject: [PATCH 087/289] Switch to hypothesis-based test. --- src/allmydata/test/test_storage_http.py | 31 ++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 811cc2ac1..5c429af88 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -235,6 +235,13 @@ class RouteConverterTests(SyncTestCase): SWISSNUM_FOR_TEST = b"abcd" +def gen_bytes(length: int) -> bytes: + """Generate bytes to the given length.""" + result = (b"0123456789abcdef" * ((length // 16) + 1))[:length] + assert len(result) == length + return result + + class TestApp(object): """HTTP API for testing purposes.""" @@ -254,10 +261,10 @@ class TestApp(object): request.setHeader("content-type", CBOR_MIME_TYPE) return dumps({"garbage": 123}) - @_authorized_route(_app, set(), "/millionbytes", methods=["GET"]) - def million_bytes(self, request, authorization): - """Return 1,000,000 bytes.""" - return b"0123456789" * 100_000 + @_authorized_route(_app, set(), "/bytes/", methods=["GET"]) + def generate_bytes(self, request, authorization, length): + """Return bytes to the given length using ``gen_bytes()``.""" + return gen_bytes(length) def result_of(d): @@ -324,34 +331,36 @@ class CustomHTTPServerTests(SyncTestCase): with self.assertRaises(CDDLValidationError): result_of(client.get_version()) - def test_limited_content_fits(self): + @given(length=st.integers(min_value=1, max_value=1_000_000)) + def test_limited_content_fits(self, length): """ ``http_client.limited_content()`` returns the body if it is less than the max length. """ - for at_least_length in (1_000_000, 1_000_001): + for at_least_length in (length, length + 1, length + 1000): response = result_of( self.client.request( "GET", - "http://127.0.0.1/millionbytes", + f"http://127.0.0.1/bytes/{length}", ) ) self.assertEqual( result_of(limited_content(response, at_least_length)), - b"0123456789" * 100_000, + gen_bytes(length), ) - def test_limited_content_does_not_fit(self): + @given(length=st.integers(min_value=10, max_value=1_000_000)) + def test_limited_content_does_not_fit(self, length): """ If the body is longer than than max length, ``http_client.limited_content()`` fails with a ``ValueError``. """ - for too_short in (999_999, 10): + for too_short in (length - 1, 5): response = result_of( self.client.request( "GET", - "http://127.0.0.1/millionbytes", + f"http://127.0.0.1/bytes/{length}", ) ) From 1e6864ac0116b834be34ae556b80a7ca52f07e28 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 5 Jul 2022 11:30:01 -0400 Subject: [PATCH 088/289] Typo. --- src/allmydata/storage/http_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 50e4ec946..ffba354bb 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -285,7 +285,7 @@ _SCHEMAS = { } -# Callabale that takes offset and length, returns the data at that range. +# Callable that takes offset and length, returns the data at that range. ReadData = Callable[[int, int], bytes] From 3270d24c45d1613b5418f6f189517b859b4afdaa Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 5 Jul 2022 11:30:48 -0400 Subject: [PATCH 089/289] Slight simplification. --- src/allmydata/storage/http_server.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index ffba354bb..c727b5e95 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -24,7 +24,7 @@ from twisted.web.server import Site, Request from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.python.filepath import FilePath -from attrs import define, field +from attrs import define, field, Factory from werkzeug.http import ( parse_range_header, parse_content_range_header, @@ -149,11 +149,11 @@ class StorageIndexUploads(object): """ # Map share number to BucketWriter - shares: dict[int, BucketWriter] = field(factory=dict) + shares: dict[int, BucketWriter] = Factory(dict) # Map share number to the upload secret (different shares might have # different upload secrets). - upload_secrets: dict[int, bytes] = field(factory=dict) + upload_secrets: dict[int, bytes] = Factory(dict) @define @@ -163,10 +163,10 @@ class UploadsInProgress(object): """ # Map storage index to corresponding uploads-in-progress - _uploads: dict[bytes, StorageIndexUploads] = field(factory=dict) + _uploads: dict[bytes, StorageIndexUploads] = Factory(dict) # Map BucketWriter to (storage index, share number) - _bucketwriters: dict[BucketWriter, Tuple[bytes, int]] = field(factory=dict) + _bucketwriters: dict[BucketWriter, Tuple[bytes, int]] = Factory(dict) def add_write_bucket( self, @@ -299,7 +299,7 @@ class _ReadAllProducer: request: Request read_data: ReadData - result: Deferred = field(factory=Deferred) + result: Deferred = Factory(Deferred) start: int = field(default=0) @classmethod From 6e3ca256b9eaf4240a782abfcde887d360b10f10 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 5 Jul 2022 15:36:21 -0400 Subject: [PATCH 090/289] Some refactoring to handle edge cases better, in progress. --- src/allmydata/storage/http_server.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index c727b5e95..d55d12711 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -349,26 +349,35 @@ class _ReadRangeProducer: to_read = min(self.remaining, 65536) data = self.read_data(self.start, to_read) assert len(data) <= to_read - if self.first_read and data: + + if self.first_read and self.remaining > 0: # For empty bodies the content-range header makes no sense since # the end of the range is inclusive. self.request.setHeader( "content-range", - ContentRange("bytes", self.start, self.start + len(data)).to_header(), + ContentRange( + "bytes", self.start, self.start + self.remaining + ).to_header(), ) + self.first_read = False + + if not data and self.remaining > 0: + # Either data is missing locally (storage issue?) or a bug + pass # TODO abort. TODO test + + self.start += len(data) + self.remaining -= len(data) + assert self.remaining >= 0 + self.request.write(data) - if not data or len(data) < to_read: + if self.remaining == 0: self.request.unregisterProducer() d = self.result del self.result d.callback(b"") return - self.start += len(data) - self.remaining -= len(data) - assert self.remaining >= 0 - def pauseProducing(self): pass @@ -412,6 +421,8 @@ def read_range(request: Request, read_data: ReadData) -> None: ): raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE) + # TODO if end is beyond the end of the share, either return error, or maybe + # just return what we can... offset, end = range_header.ranges[0] request.setResponseCode(http.PARTIAL_CONTENT) d = Deferred() From 69c4dbf2b5e04cb3dd9e79ea9b98686178d777c4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 5 Jul 2022 17:17:38 -0400 Subject: [PATCH 091/289] Fix tests and point to future work. --- src/allmydata/storage/http_server.py | 15 ++++++++++++--- src/allmydata/test/test_storage_http.py | 4 +++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index d55d12711..9d90ba960 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -353,6 +353,11 @@ class _ReadRangeProducer: if self.first_read and self.remaining > 0: # For empty bodies the content-range header makes no sense since # the end of the range is inclusive. + # + # TODO this is wrong for requests that go beyond the end of the + # share. This will be fixed in + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 by making that + # edge case not happen. self.request.setHeader( "content-range", ContentRange( @@ -362,8 +367,11 @@ class _ReadRangeProducer: self.first_read = False if not data and self.remaining > 0: - # Either data is missing locally (storage issue?) or a bug - pass # TODO abort. TODO test + # TODO Either data is missing locally (storage issue?) or a bug, + # abort response with error? Until + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 is implemented + # we continue anyway. + pass self.start += len(data) self.remaining -= len(data) @@ -371,7 +379,8 @@ class _ReadRangeProducer: self.request.write(data) - if self.remaining == 0: + # TODO remove the second clause in https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 + if self.remaining == 0 or not data: self.request.unregisterProducer() d = self.result del self.result diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 5c429af88..4e44a9f96 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -1451,8 +1451,10 @@ class SharedImmutableMutableTestsMixin: ) check_range("bytes=0-10", "bytes 0-10/*") + check_range("bytes=3-17", "bytes 3-17/*") + # TODO re-enable in https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 # Can't go beyond the end of the mutable/immutable! - check_range("bytes=10-100", "bytes 10-25/*") + #check_range("bytes=10-100", "bytes 10-25/*") class ImmutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): From 5c5556d91505b659ce44e33c31e2ef82d4b079d1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 6 Jul 2022 09:38:31 -0400 Subject: [PATCH 092/289] More robust usage. --- src/allmydata/storage/http_client.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index b8bd0bf20..0ccc3c4a1 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -18,7 +18,7 @@ from werkzeug.datastructures import Range, ContentRange from twisted.web.http_headers import Headers from twisted.web import http from twisted.web.iweb import IPolicyForHTTPS -from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred +from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred, succeed from twisted.internet.interfaces import IOpenSSLClientConnectionCreator from twisted.internet.ssl import CertificateOptions from twisted.web.client import Agent, HTTPConnectionPool @@ -137,7 +137,10 @@ def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred: fails with a ``ValueError``. """ collector = _LengthLimitedCollector(max_length) - d = treq.collect(response, collector) + # Make really sure everything gets called in Deferred context, treq might + # call collector directly... + d = succeed(None) + d.addCallback(lambda _: treq.collect(response, collector)) d.addCallback(lambda _: collector.f.getvalue()) return d From dac0080ea26cbbe83dfaaf06a777e7b5a554fa63 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 6 Jul 2022 09:40:46 -0400 Subject: [PATCH 093/289] Make sure we update remaining length, and update test to catch the edge case this fixes. --- src/allmydata/storage/http_client.py | 3 ++- src/allmydata/test/test_storage_http.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 0ccc3c4a1..daadebb28 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -125,7 +125,8 @@ class _LengthLimitedCollector: f: BytesIO = field(factory=BytesIO) def __call__(self, data: bytes): - if len(data) > self.remaining_length: + self.remaining_length -= len(data) + if self.remaining_length < 0: raise ValueError("Response length was too long") self.f.write(data) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 4e44a9f96..533771866 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -337,7 +337,7 @@ class CustomHTTPServerTests(SyncTestCase): ``http_client.limited_content()`` returns the body if it is less than the max length. """ - for at_least_length in (length, length + 1, length + 1000): + for at_least_length in (length, length + 1, length + 1000, length + 100_000): response = result_of( self.client.request( "GET", From fd8a385d1d70a52ecf26eade6c9f3933d73fef79 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 6 Jul 2022 09:46:59 -0400 Subject: [PATCH 094/289] Reformat with black. --- src/allmydata/test/test_storage_http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 533771866..885750441 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -1454,7 +1454,7 @@ class SharedImmutableMutableTestsMixin: check_range("bytes=3-17", "bytes 3-17/*") # TODO re-enable in https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 # Can't go beyond the end of the mutable/immutable! - #check_range("bytes=10-100", "bytes 10-25/*") + # check_range("bytes=10-100", "bytes 10-25/*") class ImmutableSharedTests(SharedImmutableMutableTestsMixin, SyncTestCase): From 0b5132745ddfd8c2b14f66359535dc8e3c7a1eab Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 6 Jul 2022 09:47:08 -0400 Subject: [PATCH 095/289] A nicer interface. --- src/allmydata/storage/http_client.py | 18 ++++++++++++++---- src/allmydata/test/test_storage_http.py | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index daadebb28..b8ba1641a 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -4,7 +4,7 @@ HTTP client that talks to the HTTP storage server. from __future__ import annotations -from typing import Union, Optional, Sequence, Mapping +from typing import Union, Optional, Sequence, Mapping, BinaryIO from base64 import b64encode from io import BytesIO @@ -131,25 +131,35 @@ class _LengthLimitedCollector: self.f.write(data) -def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred: +def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred[BinaryIO]: """ Like ``treq.content()``, but limit data read from the response to a set length. If the response is longer than the max allowed length, the result fails with a ``ValueError``. + + A potentially useful future improvement would be using a temporary file to + store the content; since filesystem buffering means that would use memory + for small responses and disk for large responses. """ collector = _LengthLimitedCollector(max_length) # Make really sure everything gets called in Deferred context, treq might # call collector directly... d = succeed(None) d.addCallback(lambda _: treq.collect(response, collector)) - d.addCallback(lambda _: collector.f.getvalue()) + + def done(_): + collector.f.seek(0) + return collector.f + + d.addCallback(done) return d def _decode_cbor(response, schema: Schema): """Given HTTP response, return decoded CBOR body.""" - def got_content(data): + def got_content(f: BinaryIO): + data = f.read() schema.validate_cbor(data) return loads(data) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 885750441..419052282 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -346,7 +346,7 @@ class CustomHTTPServerTests(SyncTestCase): ) self.assertEqual( - result_of(limited_content(response, at_least_length)), + result_of(limited_content(response, at_least_length)).read(), gen_bytes(length), ) From 87932e3444267a50c4a00700d356fda4057a9b14 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 6 Jul 2022 09:50:16 -0400 Subject: [PATCH 096/289] Correct type. --- src/allmydata/storage/http_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 9d90ba960..c53906218 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -4,7 +4,7 @@ HTTP server for storage. from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any, Callable +from typing import Dict, List, Set, Tuple, Any, Callable, Union from functools import wraps from base64 import b64decode import binascii @@ -394,7 +394,7 @@ class _ReadRangeProducer: pass -def read_range(request: Request, read_data: ReadData) -> None: +def read_range(request: Request, read_data: ReadData) -> Union[Deferred, bytes]: """ Read an optional ``Range`` header, reads data appropriately via the given callable, writes the data to the request. From a24aefaebf8f0487b4a8cc981c7cb238d0aca1d2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 15 Jul 2022 11:35:28 -0400 Subject: [PATCH 097/289] There can be up to 256 shares. --- src/allmydata/storage/http_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index c53906218..a29742bab 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -253,7 +253,7 @@ _SCHEMAS = { "allocate_buckets": Schema( """ request = { - share-numbers: #6.258([*30 uint]) + share-numbers: #6.258([*256 uint]) allocated-size: uint } """ From 49dfc8445cec28d6d903d0a15ee69c411b1b70a1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 18 Jul 2022 14:12:12 -0400 Subject: [PATCH 098/289] Implementation of getting length of shares (albeit inefficiently for now). --- src/allmydata/storage/immutable.py | 8 ++++++++ src/allmydata/storage/mutable.py | 10 +++++----- src/allmydata/storage/server.py | 10 ++++++++++ src/allmydata/test/test_storage.py | 22 ++++++++++++++++++++++ 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 920bd3c5e..2c65304b8 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -199,8 +199,13 @@ class ShareFile(object): raise UnknownImmutableContainerVersionError(filename, version) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) + self._length = filesize - 0xc - (num_leases * self.LEASE_SIZE) + self._data_offset = 0xc + def get_length(self): + return self._length + def unlink(self): os.unlink(self.home) @@ -544,6 +549,9 @@ class BucketReader(object): self.shnum, reason) + def get_length(self): + return self._share_file.get_length() + @implementer(RIBucketReader) class FoolscapBucketReader(Referenceable): # type: ignore # warner/foolscap#78 diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index bd59d96b8..9a99979e9 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -412,11 +412,11 @@ class MutableShareFile(object): datav.append(self._read_share_data(f, offset, length)) return datav -# def remote_get_length(self): -# f = open(self.home, 'rb') -# data_length = self._read_data_length(f) -# f.close() -# return data_length + def get_length(self): + f = open(self.home, 'rb') + data_length = self._read_data_length(f) + f.close() + return data_length def check_write_enabler(self, write_enabler, si_s): with open(self.home, 'rb+') as f: diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 0a1999dfb..f452885d0 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -794,6 +794,16 @@ class StorageServer(service.MultiService): return None + def get_immutable_share_length(self, storage_index: bytes, share_number: int) -> int: + """Returns the length (in bytes) of an immutable.""" + return self.get_buckets(storage_index)[share_number].get_length() + + def get_mutable_share_length(self, storage_index: bytes, share_number: int) -> int: + """Returns the length (in bytes) of a mutable.""" + return MutableShareFile( + dict(self.get_shares(storage_index))[share_number] + ).get_length() + @implementer(RIStorageServer) class FoolscapStorageServer(Referenceable): # type: ignore # warner/foolscap#78 diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 91d55790e..bb8d48d2f 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -688,6 +688,15 @@ class Server(unittest.TestCase): writer.abort() self.failUnlessEqual(ss.allocated_size(), 0) + def test_immutable_length(self): + """``get_immutable_share_length()`` returns the length of an immutable share.""" + ss = self.create("test_immutable_length") + _, writers = self.allocate(ss, b"allocate", [22], 75) + bucket = writers[22] + bucket.write(0, b"X" * 75) + bucket.close() + self.assertEqual(ss.get_immutable_share_length(b"allocate", 22), 75) + def test_allocate(self): ss = self.create("test_allocate") @@ -1340,6 +1349,19 @@ class MutableServer(unittest.TestCase): (set(), {0, 1, 2, 4}, {0, 1, 4}) ) + def test_mutable_share_length(self): + """``get_mutable_share_length()`` returns the length of the share.""" + ss = self.create("test_mutable_share_length") + self.allocate(ss, b"si1", b"we1", b"le1", [16], 23) + ss.slot_testv_and_readv_and_writev( + b"si1", (self.write_enabler(b"we1"), + self.renew_secret(b"le1"), + self.cancel_secret(b"le1")), + {16: ([], [(0, b"x" * 23)], None)}, + [] + ) + self.assertEqual(ss.get_mutable_share_length(b"si1", 16), 23) + def test_bad_magic(self): ss = self.create("test_bad_magic") self.allocate(ss, b"si1", b"we1", next(self._lease_secret), set([0]), 10) From b3aff5c43b73718de0ecffa6c39d5efac2f6d336 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 19 Jul 2022 14:37:46 -0400 Subject: [PATCH 099/289] More efficient implementations. --- src/allmydata/storage/server.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index f452885d0..1b9b051eb 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -796,13 +796,16 @@ class StorageServer(service.MultiService): def get_immutable_share_length(self, storage_index: bytes, share_number: int) -> int: """Returns the length (in bytes) of an immutable.""" - return self.get_buckets(storage_index)[share_number].get_length() + si_dir = storage_index_to_dir(storage_index) + path = os.path.join(self.sharedir, si_dir, str(share_number)) + bucket = BucketReader(self, path, storage_index, share_number) + return bucket.get_length() def get_mutable_share_length(self, storage_index: bytes, share_number: int) -> int: """Returns the length (in bytes) of a mutable.""" - return MutableShareFile( - dict(self.get_shares(storage_index))[share_number] - ).get_length() + si_dir = storage_index_to_dir(storage_index) + path = os.path.join(self.sharedir, si_dir, str(share_number)) + return MutableShareFile(path).get_length() @implementer(RIStorageServer) From 1b8b71b3068e73486d406f6c79fbbdbf8ecaf261 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 19 Jul 2022 16:10:22 -0400 Subject: [PATCH 100/289] Content-Range headers are now checked (somewhat) and the server now sends correct headers when reading beyond the end. --- src/allmydata/storage/http_client.py | 29 ++++++++++++++++++++++-- src/allmydata/storage/http_server.py | 34 ++++++++++++++++++---------- src/allmydata/storage/server.py | 2 ++ 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index b8ba1641a..11c9ab2fc 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -7,6 +7,7 @@ from __future__ import annotations from typing import Union, Optional, Sequence, Mapping, BinaryIO from base64 import b64encode from io import BytesIO +from os import SEEK_END from attrs import define, asdict, frozen, field @@ -29,6 +30,7 @@ from treq.client import HTTPClient from treq.testing import StubTreq from OpenSSL import SSL from cryptography.hazmat.bindings.openssl.binding import Binding +from werkzeug.http import parse_content_range_header from .http_common import ( swissnum_auth_header, @@ -461,13 +463,36 @@ def read_share_chunk( "GET", url, headers=Headers( + # Ranges in HTTP are _inclusive_, Python's convention is exclusive, + # but Range constructor does that the conversion for us. {"range": [Range("bytes", [(offset, offset + length)]).to_header()]} ), ) if response.code == http.PARTIAL_CONTENT: - body = yield response.content() - returnValue(body) + content_range = parse_content_range_header( + response.headers.getRawHeaders("content-range")[0] + ) + supposed_length = content_range.stop - content_range.start + if supposed_length > length: + raise ValueError("Server sent more than we asked for?!") + # It might also send less than we asked for. That's (probably) OK, e.g. + # if we went past the end of the file. + body = yield limited_content(response, supposed_length) + body.seek(0, SEEK_END) + actual_length = body.tell() + if actual_length != supposed_length: + # Most likely a mutable that got changed out from under us, but + # concievably could be a bug... + raise ValueError( + f"Length of response sent from server ({actual_length}) " + + f"didn't match Content-Range header ({supposed_length})" + ) + body.seek(0) + returnValue(body.read()) else: + # Technically HTTP allows sending an OK with full body under these + # circumstances, but the server is not designed to do that so we ignore + # than possibility for now... raise ClientException(response.code) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index a29742bab..4eecf7f2f 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -352,12 +352,10 @@ class _ReadRangeProducer: if self.first_read and self.remaining > 0: # For empty bodies the content-range header makes no sense since - # the end of the range is inclusive. - # - # TODO this is wrong for requests that go beyond the end of the - # share. This will be fixed in - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 by making that - # edge case not happen. + # the end of the range is inclusive. Actual conversion from + # Python's exclusive ranges to inclusive ranges is handled by + # werkzeug. The case where we're reading beyond the end of the + # share is handled by caller (read_range().) self.request.setHeader( "content-range", ContentRange( @@ -368,7 +366,7 @@ class _ReadRangeProducer: if not data and self.remaining > 0: # TODO Either data is missing locally (storage issue?) or a bug, - # abort response with error? Until + # abort response with error. Until # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 is implemented # we continue anyway. pass @@ -394,7 +392,9 @@ class _ReadRangeProducer: pass -def read_range(request: Request, read_data: ReadData) -> Union[Deferred, bytes]: +def read_range( + request: Request, read_data: ReadData, share_length: int +) -> Union[Deferred, bytes]: """ Read an optional ``Range`` header, reads data appropriately via the given callable, writes the data to the request. @@ -430,9 +430,12 @@ def read_range(request: Request, read_data: ReadData) -> Union[Deferred, bytes]: ): raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE) - # TODO if end is beyond the end of the share, either return error, or maybe - # just return what we can... offset, end = range_header.ranges[0] + # If we're being ask to read beyond the length of the share, just read + # less: + end = min(end, share_length) + # TODO when if end is now <= offset? + request.setResponseCode(http.PARTIAL_CONTENT) d = Deferred() request.registerProducer( @@ -675,7 +678,7 @@ class HTTPServer(object): request.setResponseCode(http.NOT_FOUND) return b"" - return read_range(request, bucket.read) + return read_range(request, bucket.read, bucket.get_length()) @_authorized_route( _app, @@ -763,6 +766,13 @@ class HTTPServer(object): def read_mutable_chunk(self, request, authorization, storage_index, share_number): """Read a chunk from a mutable.""" + try: + share_length = self._storage_server.get_mutable_share_length( + storage_index, share_number + ) + except KeyError: + raise _HTTPError(http.NOT_FOUND) + def read_data(offset, length): try: return self._storage_server.slot_readv( @@ -771,7 +781,7 @@ class HTTPServer(object): except KeyError: raise _HTTPError(http.NOT_FOUND) - return read_range(request, read_data) + return read_range(request, read_data, share_length) @_authorized_route( _app, set(), "/v1/mutable//shares", methods=["GET"] diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 1b9b051eb..88b650bb9 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -805,6 +805,8 @@ class StorageServer(service.MultiService): """Returns the length (in bytes) of a mutable.""" si_dir = storage_index_to_dir(storage_index) path = os.path.join(self.sharedir, si_dir, str(share_number)) + if not os.path.exists(path): + raise KeyError("No such storage index or share number") return MutableShareFile(path).get_length() From 43c6af6fde66ca49aed735aba9d927ae44e86a2a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 11:28:14 -0400 Subject: [PATCH 101/289] More error handling for edge cases. --- src/allmydata/storage/http_server.py | 42 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 4eecf7f2f..82d3d4794 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -355,7 +355,7 @@ class _ReadRangeProducer: # the end of the range is inclusive. Actual conversion from # Python's exclusive ranges to inclusive ranges is handled by # werkzeug. The case where we're reading beyond the end of the - # share is handled by caller (read_range().) + # share is handled by the caller, read_range(). self.request.setHeader( "content-range", ContentRange( @@ -365,11 +365,24 @@ class _ReadRangeProducer: self.first_read = False if not data and self.remaining > 0: - # TODO Either data is missing locally (storage issue?) or a bug, - # abort response with error. Until - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 is implemented - # we continue anyway. - pass + d, self.result = self.result, None + d.errback( + ValueError( + f"Should be {remaining} bytes left, but we got an empty read" + ) + ) + self.stopProducing() + return + + if len(data) > self.remaining: + d, self.result = self.result, None + d.errback( + ValueError( + f"Should be {remaining} bytes left, but we got more than that ({len(data)})!" + ) + ) + self.stopProducing() + return self.start += len(data) self.remaining -= len(data) @@ -377,19 +390,20 @@ class _ReadRangeProducer: self.request.write(data) - # TODO remove the second clause in https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 - if self.remaining == 0 or not data: - self.request.unregisterProducer() - d = self.result - del self.result - d.callback(b"") - return + if self.remaining == 0: + self.stopProducing() def pauseProducing(self): pass def stopProducing(self): - pass + if self.request is not None: + self.request.unregisterProducer() + self.request = None + if self.result is not None: + d = self.result + self.result = None + d.callback(b"") def read_range( From 69739f5f9bf28d6e35c8ceacafad4554056fabd5 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 11:42:01 -0400 Subject: [PATCH 102/289] Handle case where requested range results in empty response. --- docs/proposed/http-storage-node-protocol.rst | 2 ++ src/allmydata/storage/http_client.py | 6 +++- src/allmydata/storage/http_server.py | 29 +++++++++----------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 7e0b4a542..6a4e4136a 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -654,6 +654,8 @@ The ``Range`` header may be used to request exactly one ``bytes`` range, in whic Interpretation and response behavior is as specified in RFC 7233 § 4.1. Multiple ranges in a single request are *not* supported; open-ended ranges are also not supported. +If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. + Discussion `````````` diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 11c9ab2fc..236ec970f 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -468,6 +468,10 @@ def read_share_chunk( {"range": [Range("bytes", [(offset, offset + length)]).to_header()]} ), ) + + if response.code == http.NO_CONTENT: + return b"" + if response.code == http.PARTIAL_CONTENT: content_range = parse_content_range_header( response.headers.getRawHeaders("content-range")[0] @@ -488,7 +492,7 @@ def read_share_chunk( + f"didn't match Content-Range header ({supposed_length})" ) body.seek(0) - returnValue(body.read()) + return body.read() else: # Technically HTTP allows sending an OK with full body under these # circumstances, but the server is not designed to do that so we ignore diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 82d3d4794..cb55afffe 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -343,27 +343,12 @@ class _ReadRangeProducer: result: Deferred start: int remaining: int - first_read: bool = field(default=True) def resumeProducing(self): to_read = min(self.remaining, 65536) data = self.read_data(self.start, to_read) assert len(data) <= to_read - if self.first_read and self.remaining > 0: - # For empty bodies the content-range header makes no sense since - # the end of the range is inclusive. Actual conversion from - # Python's exclusive ranges to inclusive ranges is handled by - # werkzeug. The case where we're reading beyond the end of the - # share is handled by the caller, read_range(). - self.request.setHeader( - "content-range", - ContentRange( - "bytes", self.start, self.start + self.remaining - ).to_header(), - ) - self.first_read = False - if not data and self.remaining > 0: d, self.result = self.result, None d.errback( @@ -448,9 +433,21 @@ def read_range( # If we're being ask to read beyond the length of the share, just read # less: end = min(end, share_length) - # TODO when if end is now <= offset? + if offset >= end: + # Basically we'd need to return an empty body. However, the + # Content-Range header can't actually represent empty lengths... so + # (mis)use 204 response code to indicate that. + raise _HTTPError(http.NO_CONTENT) request.setResponseCode(http.PARTIAL_CONTENT) + + # Actual conversion from Python's exclusive ranges to inclusive ranges is + # handled by werkzeug. + request.setHeader( + "content-range", + ContentRange("bytes", offset, end).to_header(), + ) + d = Deferred() request.registerProducer( _ReadRangeProducer( From 92392501a7439c582599cc7cc36a6270926770c1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 11:47:15 -0400 Subject: [PATCH 103/289] Expand spec. --- docs/proposed/http-storage-node-protocol.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 6a4e4136a..09b523c87 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -654,6 +654,9 @@ The ``Range`` header may be used to request exactly one ``bytes`` range, in whic Interpretation and response behavior is as specified in RFC 7233 § 4.1. Multiple ranges in a single request are *not* supported; open-ended ranges are also not supported. +If the response reads beyond the end fo the data, the response may be shorter than then requested range. +The resulting ``Content-Range`` header will be consistent with the returned data. + If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. Discussion @@ -756,6 +759,11 @@ The ``Range`` header may be used to request exactly one ``bytes`` range, in whic Interpretation and response behavior is as specified in RFC 7233 § 4.1. Multiple ranges in a single request are *not* supported; open-ended ranges are also not supported. +If the response reads beyond the end fo the data, the response may be shorter than then requested range. +The resulting ``Content-Range`` header will be consistent with the returned data. + +If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. + ``POST /v1/mutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! From da8a36fac9af1f37eb34bf7ab0a46253e906980b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 12:07:46 -0400 Subject: [PATCH 104/289] Improve test coverage. --- src/allmydata/test/test_storage.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index bb8d48d2f..c3f2a35e1 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -689,13 +689,17 @@ class Server(unittest.TestCase): self.failUnlessEqual(ss.allocated_size(), 0) def test_immutable_length(self): - """``get_immutable_share_length()`` returns the length of an immutable share.""" + """ + ``get_immutable_share_length()`` returns the length of an immutable + share, as does ``BucketWriter.get_length()``.. + """ ss = self.create("test_immutable_length") _, writers = self.allocate(ss, b"allocate", [22], 75) bucket = writers[22] bucket.write(0, b"X" * 75) bucket.close() self.assertEqual(ss.get_immutable_share_length(b"allocate", 22), 75) + self.assertEqual(ss.get_buckets(b"allocate")[22].get_length(), 75) def test_allocate(self): ss = self.create("test_allocate") @@ -1362,6 +1366,26 @@ class MutableServer(unittest.TestCase): ) self.assertEqual(ss.get_mutable_share_length(b"si1", 16), 23) + def test_mutable_share_length_unknown(self): + """ + ``get_mutable_share_length()`` raises a ``KeyError`` on unknown shares. + """ + ss = self.create("test_mutable_share_length_unknown") + self.allocate(ss, b"si1", b"we1", b"le1", [16], 23) + ss.slot_testv_and_readv_and_writev( + b"si1", (self.write_enabler(b"we1"), + self.renew_secret(b"le1"), + self.cancel_secret(b"le1")), + {16: ([], [(0, b"x" * 23)], None)}, + [] + ) + with self.assertRaises(KeyError): + # Wrong share number. + ss.get_mutable_share_length(b"si1", 17) + with self.assertRaises(KeyError): + # Wrong storage index + ss.get_mutable_share_length(b"unknown", 16) + def test_bad_magic(self): ss = self.create("test_bad_magic") self.allocate(ss, b"si1", b"we1", next(self._lease_secret), set([0]), 10) From d85b20b62d92d9cde835e1fb2438660f5e725962 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 12:47:18 -0400 Subject: [PATCH 105/289] Fix lint. --- src/allmydata/storage/http_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index cb55afffe..68d0740b1 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -353,7 +353,7 @@ class _ReadRangeProducer: d, self.result = self.result, None d.errback( ValueError( - f"Should be {remaining} bytes left, but we got an empty read" + f"Should be {self.remaining} bytes left, but we got an empty read" ) ) self.stopProducing() @@ -363,7 +363,7 @@ class _ReadRangeProducer: d, self.result = self.result, None d.errback( ValueError( - f"Should be {remaining} bytes left, but we got more than that ({len(data)})!" + f"Should be {self.remaining} bytes left, but we got more than that ({len(data)})!" ) ) self.stopProducing() From 3b7345205bbb68a79f079b7c619ca2a912c7c918 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 14:24:10 -0400 Subject: [PATCH 106/289] News file. --- newsfragments/3709.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3709.minor diff --git a/newsfragments/3709.minor b/newsfragments/3709.minor new file mode 100644 index 000000000..e69de29bb From 11f4ebc0d90ed80f612d29945cd2436f43f658ea Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 15:12:00 -0400 Subject: [PATCH 107/289] Hook up NURL generation to the new Foolscap/HTTPS protocol switch. --- src/allmydata/client.py | 16 ++++++++ src/allmydata/protocol_switch.py | 8 ---- src/allmydata/storage/http_server.py | 46 +++++++++++++++-------- src/allmydata/test/test_istorageserver.py | 33 +++------------- 4 files changed, 52 insertions(+), 51 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index e737f93e6..3318bbfa4 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -37,6 +37,7 @@ import allmydata from allmydata.crypto import rsa, ed25519 from allmydata.crypto.util import remove_prefix from allmydata.storage.server import StorageServer, FoolscapStorageServer +from allmydata.storage.http_server import build_nurl from allmydata import storage_client from allmydata.immutable.upload import Uploader from allmydata.immutable.offloaded import Helper @@ -658,6 +659,12 @@ class _Client(node.Node, pollmixin.PollMixin): if webport: self.init_web(webport) # strports string + # TODO this may be the wrong location for now? but as temporary measure + # it allows us to get NURLs for testing in test_istorageserver.py Will + # eventually get fixed one way or another in + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901 + self.storage_nurls = [] + def init_stats_provider(self): self.stats_provider = StatsProvider(self) self.stats_provider.setServiceParent(self) @@ -820,6 +827,15 @@ class _Client(node.Node, pollmixin.PollMixin): furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) + for location_hint in self.tub.locationHints: + if location_hint.startswith("tcp:"): + _, hostname, port = location_hint.split(":") + port = int(port) + self.storage_nurls.append( + build_nurl( + hostname, port, swissnum, self.tub.myCertificate.original.to_cryptography() + ) + ) announcement["anonymous-storage-FURL"] = furl diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 9f33560e7..21d896793 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -66,14 +66,6 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): Add the various storage server-related attributes needed by a ``Tub``-specific ``_FoolscapOrHttps`` subclass. """ - # TODO tub.locationHints will be in the format ["tcp:hostname:port"] - # (and maybe some other things we can ignore for now). We also have - # access to the certificate. Together, this should be sufficient to - # construct NURLs, one per hint. The code for NURls should be - # refactored out of http_server.py's build_nurl; that code might want - # to skip around for the future when we don't do foolscap, but for now - # this module will be main way we set up HTTPS. - # Tub.myCertificate is a twisted.internet.ssl.PrivateCertificate # instance. certificate_options = CertificateOptions( diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index e2b754b0d..7f7c1c0ae 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -10,6 +10,7 @@ from base64 import b64decode import binascii from tempfile import TemporaryFile +from cryptography.x509 import Certificate from zope.interface import implementer from klein import Klein from twisted.web import http @@ -843,6 +844,29 @@ class _TLSEndpointWrapper(object): ) +def build_nurl( + hostname: str, port: int, swissnum: str, certificate: Certificate +) -> DecodedURL: + """ + Construct a HTTPS NURL, given the hostname, port, server swissnum, and x509 + certificate for the server. Clients can then connect to the server using + this NURL. + """ + return DecodedURL().replace( + fragment="v=1", # how we know this NURL is HTTP-based (i.e. not Foolscap) + host=hostname, + port=port, + path=(swissnum,), + userinfo=( + str( + get_spki_hash(certificate), + "ascii", + ), + ), + scheme="pb", + ) + + def listen_tls( server: HTTPServer, hostname: str, @@ -862,22 +886,14 @@ def listen_tls( """ endpoint = _TLSEndpointWrapper.from_paths(endpoint, private_key_path, cert_path) - def build_nurl(listening_port: IListeningPort) -> DecodedURL: - nurl = DecodedURL().replace( - fragment="v=1", # how we know this NURL is HTTP-based (i.e. not Foolscap) - host=hostname, - port=listening_port.getHost().port, - path=(str(server._swissnum, "ascii"),), - userinfo=( - str( - get_spki_hash(load_pem_x509_certificate(cert_path.getContent())), - "ascii", - ), - ), - scheme="pb", + def get_nurl(listening_port: IListeningPort) -> DecodedURL: + return build_nurl( + hostname, + listening_port.getHost().port, + str(server._swissnum, "ascii"), + load_pem_x509_certificate(cert_path.getContent()), ) - return nurl return endpoint.listen(Site(server.get_resource())).addCallback( - lambda listening_port: (build_nurl(listening_port), listening_port) + lambda listening_port: (get_nurl(listening_port), listening_port) ) diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index 39675336f..12a3cba55 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -1084,40 +1084,17 @@ class _FoolscapMixin(_SharedMixin): class _HTTPMixin(_SharedMixin): """Run tests on the HTTP version of ``IStorageServer``.""" - def setUp(self): - self._port_assigner = SameProcessStreamEndpointAssigner() - self._port_assigner.setUp() - self.addCleanup(self._port_assigner.tearDown) - return _SharedMixin.setUp(self) - - @inlineCallbacks def _get_istorage_server(self): - swissnum = b"1234" - http_storage_server = HTTPServer(self.server, swissnum) - - # Listen on randomly assigned port, using self-signed cert: - private_key = generate_private_key() - certificate = generate_certificate(private_key) - _, endpoint_string = self._port_assigner.assign(reactor) - nurl, listening_port = yield listen_tls( - http_storage_server, - "127.0.0.1", - serverFromString(reactor, endpoint_string), - private_key_to_file(FilePath(self.mktemp()), private_key), - cert_to_file(FilePath(self.mktemp()), certificate), - ) - self.addCleanup(listening_port.stopListening) + nurl = self.clients[0].storage_nurls[0] # Create HTTP client with non-persistent connections, so we don't leak # state across tests: - returnValue( - _HTTPStorageServer.from_http_client( - StorageClient.from_nurl(nurl, reactor, persistent=False) - ) + client: IStorageServer = _HTTPStorageServer.from_http_client( + StorageClient.from_nurl(nurl, reactor, persistent=False) ) + self.assertTrue(IStorageServer.providedBy(client)) - # Eventually should also: - # self.assertTrue(IStorageServer.providedBy(client)) + return succeed(client) class FoolscapSharedAPIsTests( From 981b693402929dbe24f4f48cc5a42bb7b95b3285 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 20 Jul 2022 15:25:22 -0400 Subject: [PATCH 108/289] Make HTTPS protocols work with the protocol switcher magic. --- src/allmydata/protocol_switch.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 21d896793..d3e68f860 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -138,6 +138,13 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): protocol = self.https_factory.buildProtocol(self.transport.getPeer()) protocol.makeConnection(self.transport) protocol.dataReceived(self._buffer) + + # Update the factory so it knows we're transforming to a new + # protocol object (we'll do that next) + value = self.https_factory.protocols.pop(protocol) + self.https_factory.protocols[self] = value + + # Transform self into the TLS protocol 🪄 self.__class__ = protocol.__class__ self.__dict__ = protocol.__dict__ From 757e8c418c62864246e360be05c9cb25654d9c2a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:51:26 -0400 Subject: [PATCH 109/289] Fix typos. --- docs/proposed/http-storage-node-protocol.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 09b523c87..3dac376ff 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -654,7 +654,7 @@ The ``Range`` header may be used to request exactly one ``bytes`` range, in whic Interpretation and response behavior is as specified in RFC 7233 § 4.1. Multiple ranges in a single request are *not* supported; open-ended ranges are also not supported. -If the response reads beyond the end fo the data, the response may be shorter than then requested range. +If the response reads beyond the end of the data, the response may be shorter than the requested range. The resulting ``Content-Range`` header will be consistent with the returned data. If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. @@ -759,7 +759,7 @@ The ``Range`` header may be used to request exactly one ``bytes`` range, in whic Interpretation and response behavior is as specified in RFC 7233 § 4.1. Multiple ranges in a single request are *not* supported; open-ended ranges are also not supported. -If the response reads beyond the end fo the data, the response may be shorter than then requested range. +If the response reads beyond the end of the data, the response may be shorter than the requested range. The resulting ``Content-Range`` header will be consistent with the returned data. If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. From 5cd9ccfc6ae78be55eca1931402fd512d6199787 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:52:56 -0400 Subject: [PATCH 110/289] Slightly nicer handling for bad edge cases. --- src/allmydata/storage/http_client.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 236ec970f..a464d445a 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -474,8 +474,16 @@ def read_share_chunk( if response.code == http.PARTIAL_CONTENT: content_range = parse_content_range_header( - response.headers.getRawHeaders("content-range")[0] + response.headers.getRawHeaders("content-range")[0] or "" ) + if ( + content_range is None + or content_range.stop is None + or content_range.start is None + ): + raise ValueError( + "Content-Range was missing, invalid, or in format we don't support" + ) supposed_length = content_range.stop - content_range.start if supposed_length > length: raise ValueError("Server sent more than we asked for?!") From f671b47a6decb0f53b49697340012119286d0f91 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:53:12 -0400 Subject: [PATCH 111/289] Fix typo. --- src/allmydata/storage/http_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index a464d445a..da272240d 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -504,7 +504,7 @@ def read_share_chunk( else: # Technically HTTP allows sending an OK with full body under these # circumstances, but the server is not designed to do that so we ignore - # than possibility for now... + # that possibility for now... raise ClientException(response.code) From 36b96a8776a39d77f017725f2ca4ccb9e4f8cf5c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:53:28 -0400 Subject: [PATCH 112/289] Fix typo. --- src/allmydata/storage/http_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index da272240d..a2dc5379f 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -494,7 +494,7 @@ def read_share_chunk( actual_length = body.tell() if actual_length != supposed_length: # Most likely a mutable that got changed out from under us, but - # concievably could be a bug... + # conceivably could be a bug... raise ValueError( f"Length of response sent from server ({actual_length}) " + f"didn't match Content-Range header ({supposed_length})" From 2b3a8ddeece0e11d095e8c350a2fef66f4deee20 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:55:00 -0400 Subject: [PATCH 113/289] Docstring. --- src/allmydata/storage/mutable.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 9a99979e9..51c3a3c8b 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -413,6 +413,9 @@ class MutableShareFile(object): return datav def get_length(self): + """ + Return the length of the data in the share. + """ f = open(self.home, 'rb') data_length = self._read_data_length(f) f.close() From be963e2324c52801998796f0ba2cd931a4bf556f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:55:33 -0400 Subject: [PATCH 114/289] Docstrings. --- src/allmydata/storage/immutable.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 2c65304b8..0338af41c 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -204,6 +204,9 @@ class ShareFile(object): self._data_offset = 0xc def get_length(self): + """ + Return the length of the data in the share, if we're reading. + """ return self._length def unlink(self): @@ -549,9 +552,6 @@ class BucketReader(object): self.shnum, reason) - def get_length(self): - return self._share_file.get_length() - @implementer(RIBucketReader) class FoolscapBucketReader(Referenceable): # type: ignore # warner/foolscap#78 From 83f9c0788b1fca812048fe4cafa912f97d664501 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:56:18 -0400 Subject: [PATCH 115/289] Use more direct API. --- src/allmydata/storage/server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 88b650bb9..2bf99d74c 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -798,8 +798,7 @@ class StorageServer(service.MultiService): """Returns the length (in bytes) of an immutable.""" si_dir = storage_index_to_dir(storage_index) path = os.path.join(self.sharedir, si_dir, str(share_number)) - bucket = BucketReader(self, path, storage_index, share_number) - return bucket.get_length() + return ShareFile(path).get_length() def get_mutable_share_length(self, storage_index: bytes, share_number: int) -> int: """Returns the length (in bytes) of a mutable.""" From 94e0568653a2fc49e33ba4be8c1f86b82aa48737 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jul 2022 11:57:32 -0400 Subject: [PATCH 116/289] Actually we do need it. --- src/allmydata/storage/immutable.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 0338af41c..f7f5aebce 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -552,6 +552,12 @@ class BucketReader(object): self.shnum, reason) + def get_length(self): + """ + Return the length of the data in the share. + """ + return self._share_file.get_length() + @implementer(RIBucketReader) class FoolscapBucketReader(Referenceable): # type: ignore # warner/foolscap#78 From c14463ac6df6c1d0b4f3a44dd5548de8128c214f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 25 Jul 2022 09:52:40 -0400 Subject: [PATCH 117/289] News file. --- newsfragments/3909.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3909.minor diff --git a/newsfragments/3909.minor b/newsfragments/3909.minor new file mode 100644 index 000000000..e69de29bb From 921e3a771248c24b54e77c9c8d44cc488d572906 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 25 Jul 2022 09:55:03 -0400 Subject: [PATCH 118/289] Don't use broken version of werkzeug. --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d07031cd9..c3ee4eb90 100644 --- a/setup.py +++ b/setup.py @@ -133,7 +133,8 @@ install_requires = [ # HTTP server and client "klein", - "werkzeug", + # 2.2.0 has a bug: https://github.com/pallets/werkzeug/issues/2465 + "werkzeug != 2.2.0", "treq", "cbor2", "pycddl", From 822b652d99296a1d767f88446b2825e09caf8b65 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 29 Jul 2022 09:57:18 -0400 Subject: [PATCH 119/289] Improve factoring. --- src/allmydata/client.py | 21 +++++----------- src/allmydata/protocol_switch.py | 30 ++++++++++++++++++++--- src/allmydata/test/test_istorageserver.py | 2 +- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 3318bbfa4..9938ec076 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -37,7 +37,6 @@ import allmydata from allmydata.crypto import rsa, ed25519 from allmydata.crypto.util import remove_prefix from allmydata.storage.server import StorageServer, FoolscapStorageServer -from allmydata.storage.http_server import build_nurl from allmydata import storage_client from allmydata.immutable.upload import Uploader from allmydata.immutable.offloaded import Helper @@ -660,10 +659,10 @@ class _Client(node.Node, pollmixin.PollMixin): self.init_web(webport) # strports string # TODO this may be the wrong location for now? but as temporary measure - # it allows us to get NURLs for testing in test_istorageserver.py Will - # eventually get fixed one way or another in + # it allows us to get NURLs for testing in test_istorageserver.py. This + # will eventually get fixed one way or another in # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901 - self.storage_nurls = [] + self.storage_nurls = set() def init_stats_provider(self): self.stats_provider = StatsProvider(self) @@ -826,17 +825,9 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) - for location_hint in self.tub.locationHints: - if location_hint.startswith("tcp:"): - _, hostname, port = location_hint.split(":") - port = int(port) - self.storage_nurls.append( - build_nurl( - hostname, port, swissnum, self.tub.myCertificate.original.to_cryptography() - ) - ) - + self.storage_nurls.update( + self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) + ) announcement["anonymous-storage-FURL"] = furl enabled_storage_servers = self._enable_storage_servers( diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index d3e68f860..f1fa6e061 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -12,6 +12,8 @@ relevant information for a storage server once it becomes available later in the configuration process. """ +from __future__ import annotations + from twisted.internet.protocol import Protocol from twisted.internet.interfaces import IDelayedCall from twisted.internet.ssl import CertificateOptions @@ -19,10 +21,11 @@ from twisted.web.server import Site from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.internet import reactor +from hyperlink import DecodedURL from foolscap.negotiate import Negotiation from foolscap.api import Tub -from .storage.http_server import HTTPServer +from .storage.http_server import HTTPServer, build_nurl from .storage.server import StorageServer @@ -45,7 +48,9 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): since these are created by Foolscap's ``Tub``, by setting this to be the tub's ``negotiationClass``. - Do not use directly; this needs to be subclassed per ``Tub``. + Do not use directly, use ``support_foolscap_and_https(tub)`` instead. The + way this class works is that a new subclass is created for a specific + ``Tub`` instance. """ # These will be set by support_foolscap_and_https() and add_storage_server(). @@ -61,10 +66,14 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): _timeout: IDelayedCall @classmethod - def add_storage_server(cls, storage_server: StorageServer, swissnum): + def add_storage_server( + cls, storage_server: StorageServer, swissnum: bytes + ) -> set[DecodedURL]: """ Add the various storage server-related attributes needed by a ``Tub``-specific ``_FoolscapOrHttps`` subclass. + + Returns the resulting NURLs. """ # Tub.myCertificate is a twisted.internet.ssl.PrivateCertificate # instance. @@ -80,6 +89,21 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): Site(cls.http_storage_server.get_resource()), ) + storage_nurls = set() + for location_hint in cls.tub.locationHints: + if location_hint.startswith("tcp:"): + _, hostname, port = location_hint.split(":") + port = int(port) + storage_nurls.add( + build_nurl( + hostname, + port, + str(swissnum, "ascii"), + cls.tub.myCertificate.original.to_cryptography(), + ) + ) + return storage_nurls + def __init__(self, *args, **kwargs): self._foolscap: Negotiation = Negotiation(*args, **kwargs) self._buffer: bytes = b"" diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index 12a3cba55..90159f1f8 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -1085,7 +1085,7 @@ class _HTTPMixin(_SharedMixin): """Run tests on the HTTP version of ``IStorageServer``.""" def _get_istorage_server(self): - nurl = self.clients[0].storage_nurls[0] + nurl = list(self.clients[0].storage_nurls)[0] # Create HTTP client with non-persistent connections, so we don't leak # state across tests: From 34518f9d0dcb8fcb91382beb1ead726b811c5dff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 29 Jul 2022 10:01:09 -0400 Subject: [PATCH 120/289] Fix lints. --- src/allmydata/storage/http_server.py | 4 ++-- src/allmydata/test/test_istorageserver.py | 15 ++++----------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 98611e833..ca8917694 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -10,7 +10,7 @@ from base64 import b64decode import binascii from tempfile import TemporaryFile -from cryptography.x509 import Certificate +from cryptography.x509 import Certificate as CryptoCertificate from zope.interface import implementer from klein import Klein from twisted.web import http @@ -866,7 +866,7 @@ class _TLSEndpointWrapper(object): def build_nurl( - hostname: str, port: int, swissnum: str, certificate: Certificate + hostname: str, port: int, swissnum: str, certificate: CryptoCertificate ) -> DecodedURL: """ Construct a HTTPS NURL, given the hostname, port, server swissnum, and x509 diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index 90159f1f8..3328ea598 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -18,21 +18,14 @@ from unittest import SkipTest from twisted.internet.defer import inlineCallbacks, returnValue, succeed from twisted.internet.task import Clock from twisted.internet import reactor -from twisted.internet.endpoints import serverFromString -from twisted.python.filepath import FilePath from foolscap.api import Referenceable, RemoteException -from allmydata.interfaces import IStorageServer # really, IStorageClient +# A better name for this would be IStorageClient... +from allmydata.interfaces import IStorageServer + from .common_system import SystemTestMixin -from .common import AsyncTestCase, SameProcessStreamEndpointAssigner -from .certs import ( - generate_certificate, - generate_private_key, - private_key_to_file, - cert_to_file, -) +from .common import AsyncTestCase from allmydata.storage.server import StorageServer # not a IStorageServer!! -from allmydata.storage.http_server import HTTPServer, listen_tls from allmydata.storage.http_client import StorageClient from allmydata.storage_client import _HTTPStorageServer From 1cd2185be75e3d2e35307c43e59ab803ebb52ab3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 29 Jul 2022 10:12:24 -0400 Subject: [PATCH 121/289] More cleanups. --- src/allmydata/protocol_switch.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index f1fa6e061..2e9d404c5 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -48,21 +48,20 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): since these are created by Foolscap's ``Tub``, by setting this to be the tub's ``negotiationClass``. - Do not use directly, use ``support_foolscap_and_https(tub)`` instead. The - way this class works is that a new subclass is created for a specific - ``Tub`` instance. + Do not instantiate directly, use ``support_foolscap_and_https(tub)`` + instead. The way this class works is that a new subclass is created for a + specific ``Tub`` instance. """ - # These will be set by support_foolscap_and_https() and add_storage_server(). + # These are class attributes; they will be set by + # support_foolscap_and_https() and add_storage_server(). - # The HTTP storage server API we're exposing. - http_storage_server: HTTPServer - # The Twisted HTTPS protocol factory wrapping the storage server API: + # The Twisted HTTPS protocol factory wrapping the storage server HTTP API: https_factory: TLSMemoryBIOFactory # The tub that created us: tub: Tub - # This will be created by the instance in connectionMade(): + # This is an instance attribute; it will be set in connectionMade(). _timeout: IDelayedCall @classmethod @@ -70,11 +69,17 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): cls, storage_server: StorageServer, swissnum: bytes ) -> set[DecodedURL]: """ - Add the various storage server-related attributes needed by a - ``Tub``-specific ``_FoolscapOrHttps`` subclass. + Update a ``_FoolscapOrHttps`` subclass for a specific ``Tub`` instance + with the class attributes it requires for a specific storage server. Returns the resulting NURLs. """ + # We need to be a subclass: + assert cls != _FoolscapOrHttps + # The tub instance must already be set: + assert hasattr(cls, "tub") + assert isinstance(cls.tub, Tub) + # Tub.myCertificate is a twisted.internet.ssl.PrivateCertificate # instance. certificate_options = CertificateOptions( @@ -82,11 +87,11 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): certificate=cls.tub.myCertificate.original, ) - cls.http_storage_server = HTTPServer(storage_server, swissnum) + http_storage_server = HTTPServer(storage_server, swissnum) cls.https_factory = TLSMemoryBIOFactory( certificate_options, False, - Site(cls.http_storage_server.get_resource()), + Site(http_storage_server.get_resource()), ) storage_nurls = set() From 533d2a7ac9576734825822f0621d8db58cb36606 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 29 Jul 2022 10:15:23 -0400 Subject: [PATCH 122/289] Note Tor and I2P support. --- src/allmydata/protocol_switch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 2e9d404c5..5ab4761c6 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -107,6 +107,10 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): cls.tub.myCertificate.original.to_cryptography(), ) ) + # TODO this is probably where we'll have to support Tor and I2P? + # See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3888#comment:9 + # for discussion (there will be separate tickets added for those at + # some point.) return storage_nurls def __init__(self, *args, **kwargs): From d4c73f19fe6eaea7bba25c51e632aef441d7549e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 29 Jul 2022 10:42:56 -0400 Subject: [PATCH 123/289] A unittest for the metaclass. --- src/allmydata/protocol_switch.py | 10 +++-- src/allmydata/test/test_protocol_switch.py | 43 ++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 src/allmydata/test/test_protocol_switch.py diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 5ab4761c6..5143cab6a 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -31,13 +31,15 @@ from .storage.server import StorageServer class _PretendToBeNegotiation(type): """ - Metaclass that allows ``_FoolscapOrHttps`` to pretend to be a ``Negotiation`` - instance, since Foolscap has some ``assert isinstance(protocol, - Negotiation`` checks. + Metaclass that allows ``_FoolscapOrHttps`` to pretend to be a + ``Negotiation`` instance, since Foolscap does some checks like + ``assert isinstance(protocol, tub.negotiationClass)`` in its internals, + and sometimes that ``protocol`` is a ``_FoolscapOrHttps`` instance, but + sometimes it's a ``Negotiation`` instance. """ def __instancecheck__(self, instance): - return (instance.__class__ == self) or isinstance(instance, Negotiation) + return issubclass(instance.__class__, self) or isinstance(instance, Negotiation) class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): diff --git a/src/allmydata/test/test_protocol_switch.py b/src/allmydata/test/test_protocol_switch.py new file mode 100644 index 000000000..4906896dc --- /dev/null +++ b/src/allmydata/test/test_protocol_switch.py @@ -0,0 +1,43 @@ +""" +Unit tests for ``allmydata.protocol_switch``. + +By its nature, most of the testing needs to be end-to-end; essentially any test +that uses real Foolscap (``test_system.py``, integration tests) ensures +Foolscap still works. ``test_istorageserver.py`` tests the HTTP support. +""" + +from foolscap.negotiate import Negotiation + +from .common import TestCase +from ..protocol_switch import _PretendToBeNegotiation + + +class UtilityTests(TestCase): + """Tests for utilities in the protocol switch code.""" + + def test_metaclass(self): + """ + A class that has the ``_PretendToBeNegotiation`` metaclass will support + ``isinstance()``'s normal semantics on its own instances, but will also + indicate that ``Negotiation`` instances are its instances. + """ + + class Parent(metaclass=_PretendToBeNegotiation): + pass + + class Child(Parent): + pass + + class Other: + pass + + p = Parent() + self.assertIsInstance(p, Parent) + self.assertIsInstance(Negotiation(), Parent) + self.assertNotIsInstance(Other(), Parent) + + c = Child() + self.assertIsInstance(c, Child) + self.assertIsInstance(c, Parent) + self.assertIsInstance(Negotiation(), Child) + self.assertNotIsInstance(Other(), Child) From 8b3280bf319c68ba1eb957ee9048718070267c8d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 29 Jul 2022 10:51:17 -0400 Subject: [PATCH 124/289] Simplify more. --- src/allmydata/protocol_switch.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 5143cab6a..158df32b5 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -63,9 +63,6 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): # The tub that created us: tub: Tub - # This is an instance attribute; it will be set in connectionMade(). - _timeout: IDelayedCall - @classmethod def add_storage_server( cls, storage_server: StorageServer, swissnum: bytes @@ -117,7 +114,6 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): def __init__(self, *args, **kwargs): self._foolscap: Negotiation = Negotiation(*args, **kwargs) - self._buffer: bytes = b"" def __setattr__(self, name, value): if name in {"_foolscap", "_buffer", "transport", "__class__", "_timeout"}: @@ -139,12 +135,15 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): # After creation, a Negotiation instance either has initClient() or # initServer() called. Since this is a client, we're never going to do # HTTP, so we can immediately become a Negotiation instance. - assert not self._buffer + assert not hasattr(self, "_buffer") self._convert_to_negotiation() return self.initClient(*args, **kwargs) def connectionMade(self): - self._timeout = reactor.callLater(30, self.transport.abortConnection) + self._buffer: bytes = b"" + self._timeout: IDelayedCall = reactor.callLater( + 30, self.transport.abortConnection + ) def dataReceived(self, data: bytes) -> None: """Handle incoming data. From 709f139c85e00f452ca5dacb308fd3494eb1be4a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Aug 2022 15:51:30 -0400 Subject: [PATCH 125/289] Start refactoring to enable HTTP storage client. --- src/allmydata/storage_client.py | 183 ++++++++++++++++++++++++++------ 1 file changed, 151 insertions(+), 32 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index c63bfccff..a058ae828 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -30,6 +30,8 @@ Ported to Python 3. # # 6: implement other sorts of IStorageClient classes: S3, etc +from __future__ import annotations + from six import ensure_text from typing import Union import re, time, hashlib @@ -523,6 +525,45 @@ class IFoolscapStorageServer(Interface): """ +def _parse_announcement(server_id: bytes, furl: bytes, ann: dict) -> tuple[str, bytes, bytes, bytes, bytes]: + """ + Parse the furl and announcement, return: + + (nickname, permutation_seed, tubid, short_description, long_description) + """ + m = re.match(br'pb://(\w+)@', furl) + assert m, furl + tubid_s = m.group(1).lower() + tubid = base32.a2b(tubid_s) + if "permutation-seed-base32" in ann: + seed = ann["permutation-seed-base32"] + if isinstance(seed, str): + seed = seed.encode("utf-8") + ps = base32.a2b(seed) + elif re.search(br'^v0-[0-9a-zA-Z]{52}$', server_id): + ps = base32.a2b(server_id[3:]) + else: + log.msg("unable to parse serverid '%(server_id)s as pubkey, " + "hashing it to get permutation-seed, " + "may not converge with other clients", + server_id=server_id, + facility="tahoe.storage_broker", + level=log.UNUSUAL, umid="qu86tw") + ps = hashlib.sha256(server_id).digest() + permutation_seed = ps + + assert server_id + long_description = server_id + if server_id.startswith(b"v0-"): + # remove v0- prefix from abbreviated name + short_description = server_id[3:3+8] + else: + short_description = server_id[:8] + nickname = ann.get("nickname", "") + + return (nickname, permutation_seed, tubid, short_description, long_description) + + @implementer(IFoolscapStorageServer) @attr.s(frozen=True) class _FoolscapStorage(object): @@ -566,43 +607,13 @@ class _FoolscapStorage(object): The furl will be a Unicode string on Python 3; on Python 2 it will be either a native (bytes) string or a Unicode string. """ - furl = furl.encode("utf-8") - m = re.match(br'pb://(\w+)@', furl) - assert m, furl - tubid_s = m.group(1).lower() - tubid = base32.a2b(tubid_s) - if "permutation-seed-base32" in ann: - seed = ann["permutation-seed-base32"] - if isinstance(seed, str): - seed = seed.encode("utf-8") - ps = base32.a2b(seed) - elif re.search(br'^v0-[0-9a-zA-Z]{52}$', server_id): - ps = base32.a2b(server_id[3:]) - else: - log.msg("unable to parse serverid '%(server_id)s as pubkey, " - "hashing it to get permutation-seed, " - "may not converge with other clients", - server_id=server_id, - facility="tahoe.storage_broker", - level=log.UNUSUAL, umid="qu86tw") - ps = hashlib.sha256(server_id).digest() - permutation_seed = ps - - assert server_id - long_description = server_id - if server_id.startswith(b"v0-"): - # remove v0- prefix from abbreviated name - short_description = server_id[3:3+8] - else: - short_description = server_id[:8] - nickname = ann.get("nickname", "") - + (nickname, permutation_seed, tubid, short_description, long_description) = _parse_announcement(server_id, furl.encode("utf-8"), ann) return cls( nickname=nickname, permutation_seed=permutation_seed, tubid=tubid, storage_server=storage_server, - furl=furl, + furl=furl.encode("utf-8"), short_description=short_description, long_description=long_description, ) @@ -910,6 +921,114 @@ class NativeStorageServer(service.MultiService): # used when the broker wants us to hurry up self._reconnector.reset() + +@implementer(IServer) +class HTTPNativeStorageServer(service.MultiService): + """ + Like ``NativeStorageServer``, but for HTTP clients. + + The notion of being "connected" is less meaningful for HTTP; we just poll + occasionally, and if we've succeeded at last poll, we assume we're + "connected". + """ + + def __init__(self, server_id: bytes, announcement): + service.MultiService.__init__(self) + assert isinstance(server_id, bytes) + self._server_id = server_id + self.announcement = announcement + self._on_status_changed = ObserverList() + furl = announcement["anonymous-storage-FURL"].encode("utf-8") + self._nickname, self._permutation_seed, self._tubid, self._short_description, self._long_description = _parse_announcement(server_id, furl, announcement) + + def get_permutation_seed(self): + return self._permutation_seed + + def get_name(self): # keep methodname short + return self._name + + def get_longname(self): + return self._longname + + def get_tubid(self): + return self._tubid + + def get_lease_seed(self): + return self._lease_seed + + def get_foolscap_write_enabler_seed(self): + return self._tubid + + def get_nickname(self): + return self._nickname + + def on_status_changed(self, status_changed): + """ + :param status_changed: a callable taking a single arg (the + NativeStorageServer) that is notified when we become connected + """ + return self._on_status_changed.subscribe(status_changed) + + # Special methods used by copy.copy() and copy.deepcopy(). When those are + # used in allmydata.immutable.filenode to copy CheckResults during + # repair, we want it to treat the IServer instances as singletons, and + # not attempt to duplicate them.. + def __copy__(self): + return self + + def __deepcopy__(self, memodict): + return self + + def __repr__(self): + return "" % self.get_name() + + def get_serverid(self): + return self._server_id + + def get_version(self): + pass + + def get_announcement(self): + return self.announcement + + def get_connection_status(self): + pass + + def is_connected(self): + pass + + def get_available_space(self): + # TODO refactor into shared utility with NativeStorageServer + version = self.get_version() + if version is None: + return None + protocol_v1_version = version.get(b'http://allmydata.org/tahoe/protocols/storage/v1', BytesKeyDict()) + available_space = protocol_v1_version.get(b'available-space') + if available_space is None: + available_space = protocol_v1_version.get(b'maximum-immutable-share-size', None) + return available_space + + def start_connecting(self, trigger_cb): + pass + + def get_rref(self): + # TODO UH + pass + + def get_storage_server(self): + """ + See ``IServer.get_storage_server``. + """ + + def stop_connecting(self): + # used when this descriptor has been superceded by another + pass + + def try_to_connect(self): + # used when the broker wants us to hurry up + pass + + class UnknownServerTypeError(Exception): pass From c3e41588130e9d2c9d65965a9ea06d8f3503bd52 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Aug 2022 15:55:14 -0400 Subject: [PATCH 126/289] Remove duplication. --- src/allmydata/storage_client.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index a058ae828..e64f63413 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -695,6 +695,16 @@ def _storage_from_foolscap_plugin(node_config, config, announcement, get_rref): raise AnnouncementNotMatched() +def _available_space_from_version(version): + if version is None: + return None + protocol_v1_version = version.get(b'http://allmydata.org/tahoe/protocols/storage/v1', BytesKeyDict()) + available_space = protocol_v1_version.get(b'available-space') + if available_space is None: + available_space = protocol_v1_version.get(b'maximum-immutable-share-size', None) + return available_space + + @implementer(IServer) class NativeStorageServer(service.MultiService): """I hold information about a storage server that we want to connect to. @@ -853,13 +863,7 @@ class NativeStorageServer(service.MultiService): def get_available_space(self): version = self.get_version() - if version is None: - return None - protocol_v1_version = version.get(b'http://allmydata.org/tahoe/protocols/storage/v1', BytesKeyDict()) - available_space = protocol_v1_version.get(b'available-space') - if available_space is None: - available_space = protocol_v1_version.get(b'maximum-immutable-share-size', None) - return available_space + return _available_space_from_version(version) def start_connecting(self, trigger_cb): self._tub = self._tub_maker(self._handler_overrides) @@ -998,15 +1002,8 @@ class HTTPNativeStorageServer(service.MultiService): pass def get_available_space(self): - # TODO refactor into shared utility with NativeStorageServer version = self.get_version() - if version is None: - return None - protocol_v1_version = version.get(b'http://allmydata.org/tahoe/protocols/storage/v1', BytesKeyDict()) - available_space = protocol_v1_version.get(b'available-space') - if available_space is None: - available_space = protocol_v1_version.get(b'maximum-immutable-share-size', None) - return available_space + return _available_space_from_version(version) def start_connecting(self, trigger_cb): pass From c3b159a3fd98e63bcc0641c4c2a5dffe5e795a15 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Aug 2022 16:12:57 -0400 Subject: [PATCH 127/289] Continue simplified sketch of HTTPNativeStorageServer. --- src/allmydata/storage_client.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index e64f63413..3bcd8e6db 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -45,7 +45,7 @@ from zope.interface import ( implementer, ) from twisted.web import http -from twisted.internet import defer +from twisted.internet import defer, reactor from twisted.application import service from twisted.plugin import ( getPlugins, @@ -934,6 +934,9 @@ class HTTPNativeStorageServer(service.MultiService): The notion of being "connected" is less meaningful for HTTP; we just poll occasionally, and if we've succeeded at last poll, we assume we're "connected". + + TODO as first pass, just to get the proof-of-concept going, we will just + assume we're always connected after an initial successful HTTP request. """ def __init__(self, server_id: bytes, announcement): @@ -944,6 +947,13 @@ class HTTPNativeStorageServer(service.MultiService): self._on_status_changed = ObserverList() furl = announcement["anonymous-storage-FURL"].encode("utf-8") self._nickname, self._permutation_seed, self._tubid, self._short_description, self._long_description = _parse_announcement(server_id, furl, announcement) + self._istorage_server = _HTTPStorageServer.from_http_client( + StorageClient.from_nurl( + announcement["anonymous-storage-NURLs"][0], reactor + ) + ) + self._connection_status = connection_status.ConnectionStatus.unstarted() + self._version = None def get_permutation_seed(self): return self._permutation_seed @@ -984,29 +994,33 @@ class HTTPNativeStorageServer(service.MultiService): return self def __repr__(self): - return "" % self.get_name() + return "" % self.get_name() def get_serverid(self): return self._server_id def get_version(self): - pass + return self._version def get_announcement(self): return self.announcement def get_connection_status(self): - pass + return self._connection_status def is_connected(self): - pass + return self._connection_status.connected def get_available_space(self): version = self.get_version() return _available_space_from_version(version) def start_connecting(self, trigger_cb): - pass + self._istorage_server.get_version().addCallback(self._got_version) + + def _got_version(self, version): + self._version = version + self._connection_status = connection_status.ConnectionStatus(True, "connected", [], time.time(), time.time()) def get_rref(self): # TODO UH @@ -1016,13 +1030,15 @@ class HTTPNativeStorageServer(service.MultiService): """ See ``IServer.get_storage_server``. """ + if self.is_connected(): + return self._istorage_server + else: + return None def stop_connecting(self): - # used when this descriptor has been superceded by another pass def try_to_connect(self): - # used when the broker wants us to hurry up pass From 94be227aaaf7adfefc3457dbc7556b10c7b5f3c4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Aug 2022 16:15:21 -0400 Subject: [PATCH 128/289] Hopefully don't actually need that. --- src/allmydata/storage_client.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 3bcd8e6db..62cc047f2 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1022,10 +1022,6 @@ class HTTPNativeStorageServer(service.MultiService): self._version = version self._connection_status = connection_status.ConnectionStatus(True, "connected", [], time.time(), time.time()) - def get_rref(self): - # TODO UH - pass - def get_storage_server(self): """ See ``IServer.get_storage_server``. From 9ad4e844e86302682dfd38e82b7e262231c21ad9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Aug 2022 16:16:17 -0400 Subject: [PATCH 129/289] Do status change notification. --- src/allmydata/storage_client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 62cc047f2..254179559 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -937,6 +937,7 @@ class HTTPNativeStorageServer(service.MultiService): TODO as first pass, just to get the proof-of-concept going, we will just assume we're always connected after an initial successful HTTP request. + Might do polling as follow-up ticket, in which case add link to that here. """ def __init__(self, server_id: bytes, announcement): @@ -1021,6 +1022,7 @@ class HTTPNativeStorageServer(service.MultiService): def _got_version(self, version): self._version = version self._connection_status = connection_status.ConnectionStatus(True, "connected", [], time.time(), time.time()) + self._on_status_changed.notify(self) def get_storage_server(self): """ From f671fb04a18c5a3f20437eac3424db1f97fc5df4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 11 Aug 2022 16:24:33 -0400 Subject: [PATCH 130/289] A lot closer to working end-to-end. --- src/allmydata/client.py | 6 +++--- src/allmydata/storage_client.py | 9 ++++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 9938ec076..769554b3d 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -825,9 +825,9 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - self.storage_nurls.update( - self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) - ) + nurls = self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) + self.storage_nurls.update(nurls) + announcement["anonymous-storage-NURLs"] = [n.to_text() for n in nurls] announcement["anonymous-storage-FURL"] = furl enabled_storage_servers = self._enable_storage_servers( diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 254179559..ec03393a1 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -39,6 +39,7 @@ from os import urandom from configparser import NoSectionError import attr +from hyperlink import DecodedURL from zope.interface import ( Attribute, Interface, @@ -264,6 +265,12 @@ class StorageFarmBroker(service.MultiService): by the given announcement. """ assert isinstance(server_id, bytes) + # TODO use constant + if "anonymous-storage-NURLs" in server["ann"]: + print("HTTTTTTTPPPPPPPPPPPPPPPPPPPP") + s = HTTPNativeStorageServer(server_id, server["ann"]) + s.on_status_changed(lambda _: self._got_connection()) + return s handler_overrides = server.get("connections", {}) s = NativeStorageServer( server_id, @@ -950,7 +957,7 @@ class HTTPNativeStorageServer(service.MultiService): self._nickname, self._permutation_seed, self._tubid, self._short_description, self._long_description = _parse_announcement(server_id, furl, announcement) self._istorage_server = _HTTPStorageServer.from_http_client( StorageClient.from_nurl( - announcement["anonymous-storage-NURLs"][0], reactor + DecodedURL.from_text(announcement["anonymous-storage-NURLs"][0]), reactor ) ) self._connection_status = connection_status.ConnectionStatus.unstarted() From 09d778c2cfb4f888831835903daf6d77205ff5c7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Aug 2022 11:13:09 -0400 Subject: [PATCH 131/289] Allow nodes to disable the HTTPS storage protocol. --- src/allmydata/client.py | 7 ++++--- src/allmydata/node.py | 5 ++++- src/allmydata/storage_client.py | 4 ++-- src/allmydata/test/common_system.py | 20 +++++++++++++------- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 769554b3d..d9fc20e92 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -825,9 +825,10 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - nurls = self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) - self.storage_nurls.update(nurls) - announcement["anonymous-storage-NURLs"] = [n.to_text() for n in nurls] + if hasattr(self.tub.negotiationClass, "add_storage_server"): + nurls = self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) + self.storage_nurls.update(nurls) + announcement["anonymous-storage-NURLs"] = [n.to_text() for n in nurls] announcement["anonymous-storage-FURL"] = furl enabled_storage_servers = self._enable_storage_servers( diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 597221e9b..0ad68f2b7 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -64,6 +64,7 @@ def _common_valid_config(): "tcp", ), "node": ( + "force_foolscap", "log_gatherer.furl", "nickname", "reveal-ip-address", @@ -709,7 +710,6 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han the new Tub via `Tub.setOption` """ tub = Tub(**kwargs) - support_foolscap_and_https(tub) for (name, value) in list(tub_options.items()): tub.setOption(name, value) handlers = default_connection_handlers.copy() @@ -907,6 +907,9 @@ def create_main_tub(config, tub_options, handler_overrides=handler_overrides, certFile=certfile, ) + if not config.get_config("node", "force_foolscap", False): + support_foolscap_and_https(tub) + if portlocation is None: log.msg("Tub is not listening") else: diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index ec03393a1..3c2c7a1b8 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -102,8 +102,8 @@ class StorageClientConfig(object): :ivar preferred_peers: An iterable of the server-ids (``bytes``) of the storage servers where share placement is preferred, in order of - decreasing preference. See the *[client]peers.preferred* - documentation for details. + decreasing preference. See the *[client]peers.preferred* documentation + for details. :ivar dict[unicode, dict[unicode, unicode]] storage_plugins: A mapping from names of ``IFoolscapStoragePlugin`` configured in *tahoe.cfg* to the diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 9851d2b91..75379bbf3 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -698,7 +698,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): return f.read().strip() @inlineCallbacks - def set_up_nodes(self, NUMCLIENTS=5): + def set_up_nodes(self, NUMCLIENTS=5, force_foolscap=False): """ Create an introducer and ``NUMCLIENTS`` client nodes pointed at it. All of the nodes are running in this process. @@ -711,6 +711,9 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): :param int NUMCLIENTS: The number of client nodes to create. + :param bool force_foolscap: Force clients to use Foolscap instead of e.g. + HTTPS when available. + :return: A ``Deferred`` that fires when the nodes have connected to each other. """ @@ -719,16 +722,16 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): self.introducer = yield self._create_introducer() self.add_service(self.introducer) self.introweb_url = self._get_introducer_web() - yield self._set_up_client_nodes() + yield self._set_up_client_nodes(force_foolscap) @inlineCallbacks - def _set_up_client_nodes(self): + def _set_up_client_nodes(self, force_foolscap): q = self.introducer self.introducer_furl = q.introducer_url self.clients = [] basedirs = [] for i in range(self.numclients): - basedirs.append((yield self._set_up_client_node(i))) + basedirs.append((yield self._set_up_client_node(i, force_foolscap))) # start clients[0], wait for it's tub to be ready (at which point it # will have registered the helper furl). @@ -761,7 +764,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): # and the helper-using webport self.helper_webish_url = self.clients[3].getServiceNamed("webish").getURL() - def _generate_config(self, which, basedir): + def _generate_config(self, which, basedir, force_foolscap=False): config = {} allclients = set(range(self.numclients)) @@ -787,6 +790,9 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): if which in feature_matrix.get((section, feature), {which}): config.setdefault(section, {})[feature] = value + if force_foolscap: + config.setdefault("node", {})["force_foolscap"] = force_foolscap + setnode = partial(setconf, config, which, "node") sethelper = partial(setconf, config, which, "helper") @@ -811,14 +817,14 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): return _render_config(config) - def _set_up_client_node(self, which): + def _set_up_client_node(self, which, force_foolscap): basedir = self.getdir("client%d" % (which,)) fileutil.make_dirs(os.path.join(basedir, "private")) if len(SYSTEM_TEST_CERTS) > (which + 1): f = open(os.path.join(basedir, "private", "node.pem"), "w") f.write(SYSTEM_TEST_CERTS[which + 1]) f.close() - config = self._generate_config(which, basedir) + config = self._generate_config(which, basedir, force_foolscap) fileutil.write(os.path.join(basedir, 'tahoe.cfg'), config) return basedir From e8609ac2df01038a7c51952149aaf4566b24e271 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Aug 2022 11:24:41 -0400 Subject: [PATCH 132/289] test_istorageserver passes with both Foolscap and HTTP again. --- src/allmydata/storage_client.py | 14 +++++----- src/allmydata/test/test_istorageserver.py | 33 ++++++++++++----------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 3c2c7a1b8..e2a48e521 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -265,9 +265,8 @@ class StorageFarmBroker(service.MultiService): by the given announcement. """ assert isinstance(server_id, bytes) - # TODO use constant - if "anonymous-storage-NURLs" in server["ann"]: - print("HTTTTTTTPPPPPPPPPPPPPPPPPPPP") + # TODO use constant for anonymous-storage-NURLs + if len(server["ann"].get("anonymous-storage-NURLs", [])) > 0: s = HTTPNativeStorageServer(server_id, server["ann"]) s.on_status_changed(lambda _: self._got_connection()) return s @@ -955,10 +954,13 @@ class HTTPNativeStorageServer(service.MultiService): self._on_status_changed = ObserverList() furl = announcement["anonymous-storage-FURL"].encode("utf-8") self._nickname, self._permutation_seed, self._tubid, self._short_description, self._long_description = _parse_announcement(server_id, furl, announcement) + nurl = DecodedURL.from_text(announcement["anonymous-storage-NURLs"][0]) + # Tests don't want persistent HTTPS pool, since that leaves a dirty + # reactor. As a reasonable hack, disabling persistent connnections for + # localhost allows us to have passing tests while not reducing + # performance for real-world usage. self._istorage_server = _HTTPStorageServer.from_http_client( - StorageClient.from_nurl( - DecodedURL.from_text(announcement["anonymous-storage-NURLs"][0]), reactor - ) + StorageClient.from_nurl(nurl, reactor, nurl.host not in ("localhost", "127.0.0.1")) ) self._connection_status = connection_status.ConnectionStatus.unstarted() self._version = None diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index 3328ea598..81025d779 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -17,7 +17,6 @@ from unittest import SkipTest from twisted.internet.defer import inlineCallbacks, returnValue, succeed from twisted.internet.task import Clock -from twisted.internet import reactor from foolscap.api import Referenceable, RemoteException # A better name for this would be IStorageClient... @@ -26,8 +25,10 @@ from allmydata.interfaces import IStorageServer from .common_system import SystemTestMixin from .common import AsyncTestCase from allmydata.storage.server import StorageServer # not a IStorageServer!! -from allmydata.storage.http_client import StorageClient -from allmydata.storage_client import _HTTPStorageServer +from allmydata.storage_client import ( + NativeStorageServer, + HTTPNativeStorageServer, +) # Use random generator with known seed, so results are reproducible if tests @@ -1021,6 +1022,10 @@ class _SharedMixin(SystemTestMixin): """Base class for Foolscap and HTTP mixins.""" SKIP_TESTS = set() # type: Set[str] + FORCE_FOOLSCAP = False + + def _get_native_server(self): + return next(iter(self.clients[0].storage_broker.get_known_servers())) def _get_istorage_server(self): raise NotImplementedError("implement in subclass") @@ -1036,7 +1041,7 @@ class _SharedMixin(SystemTestMixin): self.basedir = "test_istorageserver/" + self.id() yield SystemTestMixin.setUp(self) - yield self.set_up_nodes(1) + yield self.set_up_nodes(1, self.FORCE_FOOLSCAP) self.server = None for s in self.clients[0].services: if isinstance(s, StorageServer): @@ -1065,11 +1070,12 @@ class _SharedMixin(SystemTestMixin): class _FoolscapMixin(_SharedMixin): """Run tests on Foolscap version of ``IStorageServer``.""" - def _get_native_server(self): - return next(iter(self.clients[0].storage_broker.get_known_servers())) + FORCE_FOOLSCAP = True def _get_istorage_server(self): - client = self._get_native_server().get_storage_server() + native_server = self._get_native_server() + assert isinstance(native_server, NativeStorageServer) + client = native_server.get_storage_server() self.assertTrue(IStorageServer.providedBy(client)) return succeed(client) @@ -1077,16 +1083,13 @@ class _FoolscapMixin(_SharedMixin): class _HTTPMixin(_SharedMixin): """Run tests on the HTTP version of ``IStorageServer``.""" + FORCE_FOOLSCAP = False + def _get_istorage_server(self): - nurl = list(self.clients[0].storage_nurls)[0] - - # Create HTTP client with non-persistent connections, so we don't leak - # state across tests: - client: IStorageServer = _HTTPStorageServer.from_http_client( - StorageClient.from_nurl(nurl, reactor, persistent=False) - ) + native_server = self._get_native_server() + assert isinstance(native_server, HTTPNativeStorageServer) + client = native_server.get_storage_server() self.assertTrue(IStorageServer.providedBy(client)) - return succeed(client) From 636b8a9e2de2504b347c5662a9b828636c120743 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Aug 2022 11:28:03 -0400 Subject: [PATCH 133/289] Fix a bytes-vs-str bug. --- newsfragments/3913.minor | 0 src/allmydata/test/test_storage_web.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 newsfragments/3913.minor diff --git a/newsfragments/3913.minor b/newsfragments/3913.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/test/test_storage_web.py b/src/allmydata/test/test_storage_web.py index 5984b2892..b47c93849 100644 --- a/src/allmydata/test/test_storage_web.py +++ b/src/allmydata/test/test_storage_web.py @@ -161,7 +161,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin): html = renderSynchronously(w) s = remove_tags(html) self.failUnlessIn(b"Total buckets: 0 (the number of", s) - self.failUnless(b"Next crawl in 59 minutes" in s or "Next crawl in 60 minutes" in s, s) + self.failUnless(b"Next crawl in 59 minutes" in s or b"Next crawl in 60 minutes" in s, s) d.addCallback(_check2) return d From 3fbc4d7eea3398075a6986416a585f993549cc65 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Aug 2022 11:45:37 -0400 Subject: [PATCH 134/289] Let's make this a little clearer --- src/allmydata/scripts/tahoe_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 51be32ee3..dfdc97ea5 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -179,7 +179,7 @@ class DaemonizeTheRealService(Service, HookMixin): ) ) else: - self.stderr.write("\nUnknown error\n") + self.stderr.write("\nUnknown error, here's the traceback:\n") reason.printTraceback(self.stderr) reactor.stop() From 42e818f0a702738ceae40d033fa69d43a68e5657 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Aug 2022 11:47:08 -0400 Subject: [PATCH 135/289] Refer to appropriate attributes, hopefully. --- src/allmydata/storage_client.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index e2a48e521..87041ff8b 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -968,17 +968,18 @@ class HTTPNativeStorageServer(service.MultiService): def get_permutation_seed(self): return self._permutation_seed - def get_name(self): # keep methodname short - return self._name + def get_name(self): + return self._short_description def get_longname(self): - return self._longname + return self._long_description def get_tubid(self): return self._tubid def get_lease_seed(self): - return self._lease_seed + # Apparently this is what Foolscap version above does?! + return self._tubid def get_foolscap_write_enabler_seed(self): return self._tubid From 71b7e9b643930aa2504b1e38a131dd6def208a85 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 15 Aug 2022 10:08:50 -0400 Subject: [PATCH 136/289] Support comma-separated multi-location hints. --- src/allmydata/protocol_switch.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 158df32b5..89570436c 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -14,6 +14,8 @@ the configuration process. from __future__ import annotations +from itertools import chain + from twisted.internet.protocol import Protocol from twisted.internet.interfaces import IDelayedCall from twisted.internet.ssl import CertificateOptions @@ -94,7 +96,11 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): ) storage_nurls = set() - for location_hint in cls.tub.locationHints: + # Individual hints can be in the form + # "tcp:host:port,tcp:host:port,tcp:host:port". + for location_hint in chain.from_iterable( + hints.split(",") for hints in cls.tub.locationHints + ): if location_hint.startswith("tcp:"): _, hostname, port = location_hint.split(":") port = int(port) From c1bcfab7f80d9a1f3e7b5f2e8c39dd292daedcd9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 15 Aug 2022 11:38:02 -0400 Subject: [PATCH 137/289] Repeatedly poll status of server. --- src/allmydata/storage_client.py | 36 +++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 87041ff8b..f9a6feb7d 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -46,6 +46,7 @@ from zope.interface import ( implementer, ) from twisted.web import http +from twisted.internet.task import LoopingCall from twisted.internet import defer, reactor from twisted.application import service from twisted.plugin import ( @@ -940,10 +941,6 @@ class HTTPNativeStorageServer(service.MultiService): The notion of being "connected" is less meaningful for HTTP; we just poll occasionally, and if we've succeeded at last poll, we assume we're "connected". - - TODO as first pass, just to get the proof-of-concept going, we will just - assume we're always connected after an initial successful HTTP request. - Might do polling as follow-up ticket, in which case add link to that here. """ def __init__(self, server_id: bytes, announcement): @@ -962,8 +959,10 @@ class HTTPNativeStorageServer(service.MultiService): self._istorage_server = _HTTPStorageServer.from_http_client( StorageClient.from_nurl(nurl, reactor, nurl.host not in ("localhost", "127.0.0.1")) ) + self._connection_status = connection_status.ConnectionStatus.unstarted() self._version = None + self._last_connect_time = None def get_permutation_seed(self): return self._permutation_seed @@ -1027,11 +1026,21 @@ class HTTPNativeStorageServer(service.MultiService): return _available_space_from_version(version) def start_connecting(self, trigger_cb): - self._istorage_server.get_version().addCallback(self._got_version) + self._lc = LoopingCall(self._connect) + self._lc.start(1, True) def _got_version(self, version): + self._last_connect_time = time.time() self._version = version - self._connection_status = connection_status.ConnectionStatus(True, "connected", [], time.time(), time.time()) + self._connection_status = connection_status.ConnectionStatus( + True, "connected", [], self._last_connect_time, self._last_connect_time + ) + self._on_status_changed.notify(self) + + def _failed_to_connect(self, reason): + self._connection_status = connection_status.ConnectionStatus( + False, f"failure: {reason}", [], self._last_connect_time, self._last_connect_time + ) self._on_status_changed.notify(self) def get_storage_server(self): @@ -1044,10 +1053,21 @@ class HTTPNativeStorageServer(service.MultiService): return None def stop_connecting(self): - pass + self._lc.stop() def try_to_connect(self): - pass + self._connect() + + def _connect(self): + return self._istorage_server.get_version().addCallbacks( + self._got_version, + self._failed_to_connect + ) + + def stopService(self): + service.MultiService.stopService(self) + self._lc.stop() + self._failed_to_connect("shut down") class UnknownServerTypeError(Exception): From 2e5662aa91cacf6ad36d1ea619ea08ea799591c7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Aug 2022 13:11:06 -0400 Subject: [PATCH 138/289] Temporarily enforce requirement that allocated size matches actual size of an immutable. --- src/allmydata/storage/immutable.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index f7f5aebce..6fcca3871 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -419,14 +419,19 @@ class BucketWriter(object): self._already_written.set(True, offset, end) self.ss.add_latency("write", self._clock.seconds() - start) self.ss.count("write") + return self._is_finished() - # Return whether the whole thing has been written. See - # https://github.com/mlenzen/collections-extended/issues/169 and - # https://github.com/mlenzen/collections-extended/issues/172 for why - # it's done this way. + def _is_finished(self): + """ + Return whether the whole thing has been written. + """ return sum([mr.stop - mr.start for mr in self._already_written.ranges()]) == self._max_size def close(self): + # TODO this can't actually be enabled, because it's not backwards + # compatible. But it's useful for testing, so leaving it on until the + # branch is ready for merge. + assert self._is_finished() precondition(not self.closed) self._timeout.cancel() start = self._clock.seconds() From 556606271dbde1ccfe47ba29cb3767985286a7b4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Aug 2022 13:11:45 -0400 Subject: [PATCH 139/289] News file. --- newsfragments/3915.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3915.minor diff --git a/newsfragments/3915.minor b/newsfragments/3915.minor new file mode 100644 index 000000000..e69de29bb From d50c98a1e95b912c1cbd04d4bf932117176f5ac0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 16 Aug 2022 14:34:40 -0400 Subject: [PATCH 140/289] Calculate URI extension size upfront, instead of hand-waving with a larger value. --- src/allmydata/immutable/encode.py | 18 ++++++++++++++++++ src/allmydata/immutable/layout.py | 16 +++++++--------- src/allmydata/immutable/upload.py | 27 ++++++++++++--------------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 42fc18077..c7887b7ba 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -624,6 +624,7 @@ class Encoder(object): for k in ('crypttext_root_hash', 'crypttext_hash', ): assert k in self.uri_extension_data + self.uri_extension_data uri_extension = uri.pack_extension(self.uri_extension_data) ed = {} for k,v in self.uri_extension_data.items(): @@ -694,3 +695,20 @@ class Encoder(object): return self.uri_extension_data def get_uri_extension_hash(self): return self.uri_extension_hash + + def get_uri_extension_size(self): + """ + Calculate the size of the URI extension that gets written at the end of + immutables. + + This may be done earlier than actual encoding, so e.g. we might not + know the crypttext hashes, but that's fine for our purposes since we + only care about the length. + """ + params = self.uri_extension_data.copy() + assert params + params["crypttext_hash"] = b"\x00" * 32 + params["crypttext_root_hash"] = b"\x00" * 32 + params["share_root_hash"] = b"\x00" * 32 + uri_extension = uri.pack_extension(params) + return len(uri_extension) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 79c886237..74af09a2b 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -90,7 +90,7 @@ FORCE_V2 = False # set briefly by unit tests to make small-sized V2 shares def make_write_bucket_proxy(rref, server, data_size, block_size, num_segments, - num_share_hashes, uri_extension_size_max): + num_share_hashes, uri_extension_size): # Use layout v1 for small files, so they'll be readable by older versions # (= 2**32 or data_size >= 2**32: @@ -233,8 +232,7 @@ class WriteBucketProxy(object): def put_uri_extension(self, data): offset = self._offsets['uri_extension'] assert isinstance(data, bytes) - precondition(len(data) <= self._uri_extension_size_max, - len(data), self._uri_extension_size_max) + precondition(len(data) == self._uri_extension_size) length = struct.pack(self.fieldstruct, len(data)) return self._write(offset, length+data) diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py index cb332dfdf..6b9b48f6a 100644 --- a/src/allmydata/immutable/upload.py +++ b/src/allmydata/immutable/upload.py @@ -242,31 +242,26 @@ class UploadResults(object): def get_verifycapstr(self): return self._verifycapstr -# our current uri_extension is 846 bytes for small files, a few bytes -# more for larger ones (since the filesize is encoded in decimal in a -# few places). Ask for a little bit more just in case we need it. If -# the extension changes size, we can change EXTENSION_SIZE to -# allocate a more accurate amount of space. -EXTENSION_SIZE = 1000 -# TODO: actual extensions are closer to 419 bytes, so we can probably lower -# this. def pretty_print_shnum_to_servers(s): return ', '.join([ "sh%s: %s" % (k, '+'.join([idlib.shortnodeid_b2a(x) for x in v])) for k, v in s.items() ]) + class ServerTracker(object): def __init__(self, server, sharesize, blocksize, num_segments, num_share_hashes, storage_index, - bucket_renewal_secret, bucket_cancel_secret): + bucket_renewal_secret, bucket_cancel_secret, + uri_extension_size): self._server = server self.buckets = {} # k: shareid, v: IRemoteBucketWriter self.sharesize = sharesize + self.uri_extension_size = uri_extension_size wbp = layout.make_write_bucket_proxy(None, None, sharesize, blocksize, num_segments, num_share_hashes, - EXTENSION_SIZE) + uri_extension_size) self.wbp_class = wbp.__class__ # to create more of them self.allocated_size = wbp.get_allocated_size() self.blocksize = blocksize @@ -314,7 +309,7 @@ class ServerTracker(object): self.blocksize, self.num_segments, self.num_share_hashes, - EXTENSION_SIZE) + self.uri_extension_size) b[sharenum] = bp self.buckets.update(b) return (alreadygot, set(b.keys())) @@ -487,7 +482,7 @@ class Tahoe2ServerSelector(log.PrefixingLogMixin): def get_shareholders(self, storage_broker, secret_holder, storage_index, share_size, block_size, num_segments, total_shares, needed_shares, - min_happiness): + min_happiness, uri_extension_size): """ @return: (upload_trackers, already_serverids), where upload_trackers is a set of ServerTracker instances that have agreed to hold @@ -529,7 +524,8 @@ class Tahoe2ServerSelector(log.PrefixingLogMixin): # figure out how much space to ask for wbp = layout.make_write_bucket_proxy(None, None, share_size, 0, num_segments, - num_share_hashes, EXTENSION_SIZE) + num_share_hashes, + uri_extension_size) allocated_size = wbp.get_allocated_size() # decide upon the renewal/cancel secrets, to include them in the @@ -554,7 +550,7 @@ class Tahoe2ServerSelector(log.PrefixingLogMixin): def _create_server_tracker(server, renew, cancel): return ServerTracker( server, share_size, block_size, num_segments, num_share_hashes, - storage_index, renew, cancel, + storage_index, renew, cancel, uri_extension_size ) readonly_trackers, write_trackers = self._create_trackers( @@ -1326,7 +1322,8 @@ class CHKUploader(object): d = server_selector.get_shareholders(storage_broker, secret_holder, storage_index, share_size, block_size, - num_segments, n, k, desired) + num_segments, n, k, desired, + encoder.get_uri_extension_size()) def _done(res): self._server_selection_elapsed = time.time() - server_selection_started return res From 7aa97336a0fe7b8bda7de38e2c639b142e50f494 Mon Sep 17 00:00:00 2001 From: "Fon E. Noel NFEBE" Date: Wed, 17 Aug 2022 16:03:03 +0100 Subject: [PATCH 141/289] Refactor FakeWebTest & MemoryConsumerTest classes There are base test classes namely `SyncTestCase` and `AsyncTestCase` which we would like all test classes in this code base to extend. This commit refactors two test classes to use the `SyncTestCase` with the newer assert methods. Signed-off-by: Fon E. Noel NFEBE --- newsfragments/3916.minor | 0 src/allmydata/test/test_consumer.py | 24 +++++++++++++++--------- src/allmydata/test/test_testing.py | 7 ++++--- 3 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 newsfragments/3916.minor diff --git a/newsfragments/3916.minor b/newsfragments/3916.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/test/test_consumer.py b/src/allmydata/test/test_consumer.py index a689de462..234fc2594 100644 --- a/src/allmydata/test/test_consumer.py +++ b/src/allmydata/test/test_consumer.py @@ -14,11 +14,17 @@ if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from zope.interface import implementer -from twisted.trial.unittest import TestCase from twisted.internet.interfaces import IPushProducer, IPullProducer from allmydata.util.consumer import MemoryConsumer +from .common import ( + SyncTestCase, +) +from testtools.matchers import ( + Equals, +) + @implementer(IPushProducer) @implementer(IPullProducer) @@ -50,7 +56,7 @@ class Producer(object): self.consumer.unregisterProducer() -class MemoryConsumerTests(TestCase): +class MemoryConsumerTests(SyncTestCase): """Tests for MemoryConsumer.""" def test_push_producer(self): @@ -60,14 +66,14 @@ class MemoryConsumerTests(TestCase): consumer = MemoryConsumer() producer = Producer(consumer, [b"abc", b"def", b"ghi"]) consumer.registerProducer(producer, True) - self.assertEqual(consumer.chunks, [b"abc"]) + self.assertThat(consumer.chunks, Equals([b"abc"])) producer.iterate() producer.iterate() - self.assertEqual(consumer.chunks, [b"abc", b"def", b"ghi"]) - self.assertEqual(consumer.done, False) + self.assertThat(consumer.chunks, Equals([b"abc", b"def", b"ghi"])) + self.assertFalse(consumer.done) producer.iterate() - self.assertEqual(consumer.chunks, [b"abc", b"def", b"ghi"]) - self.assertEqual(consumer.done, True) + self.assertThat(consumer.chunks, Equals([b"abc", b"def", b"ghi"])) + self.assertTrue(consumer.done) def test_pull_producer(self): """ @@ -76,8 +82,8 @@ class MemoryConsumerTests(TestCase): consumer = MemoryConsumer() producer = Producer(consumer, [b"abc", b"def", b"ghi"]) consumer.registerProducer(producer, False) - self.assertEqual(consumer.chunks, [b"abc", b"def", b"ghi"]) - self.assertEqual(consumer.done, True) + self.assertThat(consumer.chunks, Equals([b"abc", b"def", b"ghi"])) + self.assertTrue(consumer.done) # download_to_data() is effectively tested by some of the filenode tests, e.g. diff --git a/src/allmydata/test/test_testing.py b/src/allmydata/test/test_testing.py index 527b235bd..3715d1aca 100644 --- a/src/allmydata/test/test_testing.py +++ b/src/allmydata/test/test_testing.py @@ -46,9 +46,10 @@ from hypothesis.strategies import ( binary, ) -from testtools import ( - TestCase, +from .common import ( + SyncTestCase, ) + from testtools.matchers import ( Always, Equals, @@ -61,7 +62,7 @@ from testtools.twistedsupport import ( ) -class FakeWebTest(TestCase): +class FakeWebTest(SyncTestCase): """ Test the WebUI verified-fakes infrastucture """ From c9084a2a45fb16cc90d4f6043017cbc57ba463a9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 12:49:06 -0400 Subject: [PATCH 142/289] Disable assertion we can't, sadly, enable. --- src/allmydata/storage/immutable.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index 6fcca3871..a02fd3bb2 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -428,10 +428,9 @@ class BucketWriter(object): return sum([mr.stop - mr.start for mr in self._already_written.ranges()]) == self._max_size def close(self): - # TODO this can't actually be enabled, because it's not backwards - # compatible. But it's useful for testing, so leaving it on until the - # branch is ready for merge. - assert self._is_finished() + # This can't actually be enabled, because it's not backwards compatible + # with old Foolscap clients. + # assert self._is_finished() precondition(not self.closed) self._timeout.cancel() start = self._clock.seconds() From 9d03c476d196c78d7ca5ac36e57c3f1b6c1434b0 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 12:49:45 -0400 Subject: [PATCH 143/289] Make sure we write all the bytes we say we're sending. --- src/allmydata/immutable/layout.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 74af09a2b..30ab985a8 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -118,6 +118,7 @@ class WriteBucketProxy(object): self._data_size = data_size self._block_size = block_size self._num_segments = num_segments + self._written_bytes = 0 effective_segments = mathutil.next_power_of_k(num_segments,2) self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE @@ -194,6 +195,11 @@ class WriteBucketProxy(object): return self._write(offset, data) def put_crypttext_hashes(self, hashes): + # plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and + # so is not explicitly written, but we need to write everything, so + # fill it in with nulls. + self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + offset = self._offsets['crypttext_hash_tree'] assert isinstance(hashes, list) data = b"".join(hashes) @@ -242,11 +248,12 @@ class WriteBucketProxy(object): # would reduce the foolscap CPU overhead per share, but wouldn't # reduce the number of round trips, so it might not be worth the # effort. - + self._written_bytes += len(data) return self._pipeline.add(len(data), self._rref.callRemote, "write", offset, data) def close(self): + assert self._written_bytes == self.get_allocated_size(), f"{self._written_bytes} != {self.get_allocated_size()}" d = self._pipeline.add(0, self._rref.callRemote, "close") d.addCallback(lambda ign: self._pipeline.flush()) return d From 3464637bbb1de4a739d87a14d95b0a300c326063 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 12:54:26 -0400 Subject: [PATCH 144/289] Fix unit tests. --- src/allmydata/test/test_storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index c3f2a35e1..134609f81 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -463,7 +463,7 @@ class BucketProxy(unittest.TestCase): block_size=10, num_segments=5, num_share_hashes=3, - uri_extension_size_max=500) + uri_extension_size=500) self.failUnless(interfaces.IStorageBucketWriter.providedBy(bp), bp) def _do_test_readwrite(self, name, header_size, wbp_class, rbp_class): @@ -494,7 +494,7 @@ class BucketProxy(unittest.TestCase): block_size=25, num_segments=4, num_share_hashes=3, - uri_extension_size_max=len(uri_extension)) + uri_extension_size=len(uri_extension)) d = bp.put_header() d.addCallback(lambda res: bp.put_block(0, b"a"*25)) From cd81e5a01c82796fd3d69c93fb7f088ad6bf2a3b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 13:13:22 -0400 Subject: [PATCH 145/289] Hint for future debugging. --- src/allmydata/storage/immutable.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/immutable.py b/src/allmydata/storage/immutable.py index a02fd3bb2..0893513ae 100644 --- a/src/allmydata/storage/immutable.py +++ b/src/allmydata/storage/immutable.py @@ -397,7 +397,9 @@ class BucketWriter(object): """ Write data at given offset, return whether the upload is complete. """ - # Delay the timeout, since we received data: + # Delay the timeout, since we received data; if we get an + # AlreadyCancelled error, that means there's a bug in the client and + # write() was called after close(). self._timeout.reset(30 * 60) start = self._clock.seconds() precondition(not self.closed) From 92662d802cf0e43a5a5f684faba455de7a6cde53 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 13:15:13 -0400 Subject: [PATCH 146/289] Don't drop a Deferred on the ground. --- src/allmydata/immutable/layout.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 30ab985a8..de390bda9 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -198,8 +198,11 @@ class WriteBucketProxy(object): # plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and # so is not explicitly written, but we need to write everything, so # fill it in with nulls. - self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + d = self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + d.addCallback(lambda _: self._really_put_crypttext_hashes(hashes)) + return d + def _really_put_crypttext_hashes(self, hashes): offset = self._offsets['crypttext_hash_tree'] assert isinstance(hashes, list) data = b"".join(hashes) From bdb4aac0de1bd03fb8625e156135d4f0964e478c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 17 Aug 2022 13:15:27 -0400 Subject: [PATCH 147/289] Pass in the missing argument. --- src/allmydata/test/test_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 8d5435e88..18192de6c 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -983,7 +983,7 @@ class EncodingParameters(GridTestMixin, unittest.TestCase, SetDEPMixin, num_segments = encoder.get_param("num_segments") d = selector.get_shareholders(broker, sh, storage_index, share_size, block_size, num_segments, - 10, 3, 4) + 10, 3, 4, encoder.get_uri_extension_size()) def _have_shareholders(upload_trackers_and_already_servers): (upload_trackers, already_servers) = upload_trackers_and_already_servers assert servers_to_break <= len(upload_trackers) From 869b15803c506256004fd47d6c72d5a8f61e0267 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 6 Sep 2022 08:46:09 -0400 Subject: [PATCH 148/289] assorted fixes --- docs/proposed/http-storage-node-protocol.rst | 52 +++++++++++--------- docs/specifications/url.rst | 2 + 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 3dac376ff..8fe855be3 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -30,12 +30,12 @@ Glossary introducer a Tahoe-LAFS process at a known location configured to re-publish announcements about the location of storage servers - fURL + `fURL `_ a self-authenticating URL-like string which can be used to locate a remote object using the Foolscap protocol (the storage service is an example of such an object) - NURL - a self-authenticating URL-like string almost exactly like a NURL but without being tied to Foolscap + `NURL `_ + a self-authenticating URL-like string almost exactly like a fURL but without being tied to Foolscap swissnum a short random string which is part of a fURL/NURL and which acts as a shared secret to authorize clients to use a storage service @@ -580,24 +580,6 @@ Responses: the response is ``CONFLICT``. At this point the only thing to do is abort the upload and start from scratch (see below). -``PUT /v1/immutable/:storage_index/:share_number/abort`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -This cancels an *in-progress* upload. - -The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: - - X-Tahoe-Authorization: upload-secret - -The response code: - -* When the upload is still in progress and therefore the abort has succeeded, - the response is ``OK``. - Future uploads can start from scratch with no pre-existing upload state stored on the server. -* If the uploaded has already finished, the response is 405 (Method Not Allowed) - and no change is made. - - Discussion `````````` @@ -616,6 +598,24 @@ From RFC 7231:: PATCH method defined in [RFC5789]). +``PUT /v1/immutable/:storage_index/:share_number/abort`` +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +This cancels an *in-progress* upload. + +The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: + + X-Tahoe-Authorization: upload-secret + +The response code: + +* When the upload is still in progress and therefore the abort has succeeded, + the response is ``OK``. + Future uploads can start from scratch with no pre-existing upload state stored on the server. +* If the uploaded has already finished, the response is 405 (Method Not Allowed) + and no change is made. + + ``POST /v1/immutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -625,7 +625,7 @@ corruption. It also includes potentially important details about the share. For example:: - {"reason": u"expected hash abcd, got hash efgh"} + {"reason": "expected hash abcd, got hash efgh"} .. share-type, storage-index, and share-number are inferred from the URL @@ -799,6 +799,7 @@ Immutable Data 200 OK + { "required": [ {"begin": 16, "end": 48 } ] } PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum @@ -807,6 +808,7 @@ Immutable Data 200 OK + { "required": [ {"begin": 32, "end": 48 } ] } PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum @@ -823,6 +825,7 @@ Immutable Data Range: bytes=0-47 200 OK + Content-Range: bytes 0-47/48 #. Renew the lease on all immutable shares in bucket ``AAAAAAAAAAAAAAAA``:: @@ -906,9 +909,12 @@ otherwise it will read a byte which won't match `b""`:: #. Download the contents of share number ``3``:: - GET /v1/mutable/BBBBBBBBBBBBBBBB?share=3&offset=0&size=10 + GET /v1/mutable/BBBBBBBBBBBBBBBB?share=3 Authorization: Tahoe-LAFS nurl-swissnum + Range: bytes=0-16 + 200 OK + Content-Range: bytes 0-15/16 #. Renew the lease on previously uploaded mutable share in slot ``BBBBBBBBBBBBBBBB``:: diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 31fb05fad..421ac57f7 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -10,6 +10,8 @@ The intended audience for this document is Tahoe-LAFS maintainers and other deve Background ---------- +.. _fURLs: + Tahoe-LAFS first used Foolscap_ for network communication. Foolscap connection setup takes as an input a Foolscap URL or a *fURL*. A fURL includes three components: From 3b9eea5b8f1278bad158df336a9f617f8dee7895 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 6 Sep 2022 08:46:52 -0400 Subject: [PATCH 149/289] news fragment --- newsfragments/3922.documentation | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3922.documentation diff --git a/newsfragments/3922.documentation b/newsfragments/3922.documentation new file mode 100644 index 000000000..d0232dd02 --- /dev/null +++ b/newsfragments/3922.documentation @@ -0,0 +1 @@ +Several minor errors in the Great Black Swamp proposed specification document have been fixed. \ No newline at end of file From 9975fddd88d263a58e146b91b2c22b5b53500a85 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 8 Sep 2022 13:42:19 -0400 Subject: [PATCH 150/289] Get rid of garbage. --- src/allmydata/immutable/encode.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index c7887b7ba..34a9c2472 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -624,7 +624,6 @@ class Encoder(object): for k in ('crypttext_root_hash', 'crypttext_hash', ): assert k in self.uri_extension_data - self.uri_extension_data uri_extension = uri.pack_extension(self.uri_extension_data) ed = {} for k,v in self.uri_extension_data.items(): From c82bb5f21c90e293c1507c71aa68fb4768b3abb6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 8 Sep 2022 13:44:22 -0400 Subject: [PATCH 151/289] Use a more meaningful constant. --- src/allmydata/immutable/encode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 34a9c2472..3c4440486 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -706,8 +706,8 @@ class Encoder(object): """ params = self.uri_extension_data.copy() assert params - params["crypttext_hash"] = b"\x00" * 32 - params["crypttext_root_hash"] = b"\x00" * 32 - params["share_root_hash"] = b"\x00" * 32 + params["crypttext_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE + params["crypttext_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE + params["share_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE uri_extension = uri.pack_extension(params) return len(uri_extension) From 6310774b8267d2deb0620c1766bc7f93694a3803 Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Thu, 8 Sep 2022 17:50:58 +0000 Subject: [PATCH 152/289] Add documentation on OpenMetrics statistics endpoint. references ticket:3786 --- docs/stats.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/stats.rst b/docs/stats.rst index 50642d816..c7d69e0d2 100644 --- a/docs/stats.rst +++ b/docs/stats.rst @@ -264,3 +264,18 @@ the "tahoe-conf" file for notes about configuration and installing these plugins into a Munin environment. .. _Munin: http://munin-monitoring.org/ + + +Scraping Stats Values in OpenMetrics Format +=========================================== + +Time Series DataBase (TSDB) software like Prometheus_ and VictoriaMetrics_ can +parse statistics from the e.g. http://localhost:3456/statistics?t=openmetrics +URL in OpenMetrics_ format. Software like Grafana_ can then be used to graph +and alert on these numbers. You can find a pre-configured dashboard for +Grafana at https://grafana.com/grafana/dashboards/16894-tahoe-lafs/. + +.. _OpenMetrics: https://openmetrics.io/ +.. _Prometheus: https://prometheus.io/ +.. _VictoriaMetrics: https://victoriametrics.com/ +.. _Grafana: https://grafana.com/ From ae21ab74a2afb8a0db234e76e2d09e76df0f5958 Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Thu, 8 Sep 2022 18:05:59 +0000 Subject: [PATCH 153/289] Add newsfragment for the added documentation. --- newsfragments/3786.minor | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3786.minor diff --git a/newsfragments/3786.minor b/newsfragments/3786.minor new file mode 100644 index 000000000..ecd1a2c4e --- /dev/null +++ b/newsfragments/3786.minor @@ -0,0 +1 @@ +Added re-structured text documentation for the OpenMetrics format statistics endpoint. From 373a5328293693612123e7e47be4c01e5de3746b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 09:36:56 -0400 Subject: [PATCH 154/289] Detect corrupted UEB length more consistently. --- src/allmydata/immutable/layout.py | 8 ++++---- src/allmydata/test/test_repairer.py | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index de390bda9..6679fc94c 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -495,10 +495,10 @@ class ReadBucketProxy(object): if len(data) != self._fieldsize: raise LayoutInvalid("not enough bytes to encode URI length -- should be %d bytes long, not %d " % (self._fieldsize, len(data),)) length = struct.unpack(self._fieldstruct, data)[0] - if length >= 2**31: - # URI extension blocks are around 419 bytes long, so this - # must be corrupted. Anyway, the foolscap interface schema - # for "read" will not allow >= 2**31 bytes length. + if length >= 2000: + # URI extension blocks are around 419 bytes long; in previous + # versions of the code 1000 was used as a default catchall. So + # 2000 or more must be corrupted. raise RidiculouslyLargeURIExtensionBlock(length) return self._read(offset+self._fieldsize, length) diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index f9b93af72..8545b1cf4 100644 --- a/src/allmydata/test/test_repairer.py +++ b/src/allmydata/test/test_repairer.py @@ -251,6 +251,12 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin): self.judge_invisible_corruption) def test_corrupt_ueb(self): + # Note that in some rare situations this might fail, specifically if + # the length of the UEB is corrupted to be a value that is bigger than + # the size but less than 2000, it might not get caught... But that's + # mostly because in that case it doesn't meaningfully corrupt it. See + # _get_uri_extension_the_old_way() in layout.py for where the 2000 + # number comes from. self.basedir = "repairer/Verifier/corrupt_ueb" return self._help_test_verify(common._corrupt_uri_extension, self.judge_invisible_corruption) From 8d5f08771a4f73f09611500c39627334f7273fc9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 09:45:46 -0400 Subject: [PATCH 155/289] Minimal check on parameters' contents. --- src/allmydata/immutable/encode.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 3c4440486..874492785 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -705,9 +705,13 @@ class Encoder(object): only care about the length. """ params = self.uri_extension_data.copy() - assert params params["crypttext_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE params["crypttext_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE params["share_root_hash"] = b"\x00" * hashutil.CRYPTO_VAL_SIZE + assert params.keys() == { + "codec_name", "codec_params", "size", "segment_size", "num_segments", + "needed_shares", "total_shares", "tail_codec_params", + "crypttext_hash", "crypttext_root_hash", "share_root_hash" + }, params.keys() uri_extension = uri.pack_extension(params) return len(uri_extension) From 00972ba3c6d8b9b83eb8c069ec1c9fa5768aaed3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 09:59:36 -0400 Subject: [PATCH 156/289] Match latest GBS spec. --- docs/specifications/url.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 31fb05fad..a9e37a0ec 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -103,11 +103,8 @@ Version 1 The hash component of a version 1 NURL differs in three ways from the prior version. -1. The hash function used is SHA3-224 instead of SHA1. - The security of SHA1 `continues to be eroded`_. - Contrariwise SHA3 is currently the most recent addition to the SHA family by NIST. - The 224 bit instance is chosen to keep the output short and because it offers greater collision resistance than SHA1 was thought to offer even at its inception - (prior to security research showing actual collision resistance is lower). +1. The hash function used is SHA-256, to match RFC 7469. + The security of SHA1 `continues to be eroded`_; Latacora `SHA-2`_. 2. The hash is computed over the certificate's SPKI instead of the whole certificate. This allows certificate re-generation so long as the public key remains the same. This is useful to allow contact information to be updated or extension of validity period. @@ -140,7 +137,8 @@ Examples * ``pb://azEu8vlRpnEeYm0DySQDeNY3Z2iJXHC_bsbaAw@localhost:47877/64i4aokv4ej#v=1`` .. _`continues to be eroded`: https://en.wikipedia.org/wiki/SHA-1#Cryptanalysis_and_validation -.. _`explored by the web community`: https://www.imperialviolet.org/2011/05/04/pinning.html +.. _`SHA-2`: https://latacora.micro.blog/2018/04/03/cryptographic-right-answers.html +.. _`explored by the web community`: https://www.rfc-editor.org/rfc/rfc7469 .. _Foolscap: https://github.com/warner/foolscap .. [1] ``foolscap.furl.decode_furl`` is taken as the canonical definition of the syntax of a fURL. From 1759eacee3c4cbdb72d956bf1df2c35f7fc435bb Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:09:25 -0400 Subject: [PATCH 157/289] No need to include NURL. --- docs/proposed/http-storage-node-protocol.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 3dac376ff..b601a785b 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -409,8 +409,7 @@ For example:: "tolerates-immutable-read-overrun": true, "delete-mutable-shares-with-zero-length-writev": true, "fills-holes-with-zero-bytes": true, - "prevents-read-past-end-of-share-data": true, - "gbs-anonymous-storage-url": "pb://...#v=1" + "prevents-read-past-end-of-share-data": true }, "application-version": "1.13.0" } From 0d97847ef5c4625d972dd92e29f9a8187f97a6b2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:09:50 -0400 Subject: [PATCH 158/289] News file. --- newsfragments/3904.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3904.minor diff --git a/newsfragments/3904.minor b/newsfragments/3904.minor new file mode 100644 index 000000000..e69de29bb From b1aa93e02234bae93efac860a0078d5a1c089d2a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:34:59 -0400 Subject: [PATCH 159/289] Switch prefix. --- docs/proposed/http-storage-node-protocol.rst | 48 ++++++++++---------- src/allmydata/storage/http_client.py | 28 ++++++++---- src/allmydata/storage/http_server.py | 27 ++++++----- src/allmydata/test/test_storage_http.py | 8 ++-- 4 files changed, 61 insertions(+), 50 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index b601a785b..ec800367c 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -395,7 +395,7 @@ Encoding General ~~~~~~~ -``GET /v1/version`` +``GET /storage/v1/version`` !!!!!!!!!!!!!!!!!!! Retrieve information about the version of the storage server. @@ -414,7 +414,7 @@ For example:: "application-version": "1.13.0" } -``PUT /v1/lease/:storage_index`` +``PUT /storage/v1/lease/:storage_index`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Either renew or create a new lease on the bucket addressed by ``storage_index``. @@ -467,7 +467,7 @@ Immutable Writing ~~~~~~~ -``POST /v1/immutable/:storage_index`` +``POST /storage/v1/immutable/:storage_index`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Initialize an immutable storage index with some buckets. @@ -503,7 +503,7 @@ Handling repeat calls: Discussion `````````` -We considered making this ``POST /v1/immutable`` instead. +We considered making this ``POST /storage/v1/immutable`` instead. The motivation was to keep *storage index* out of the request URL. Request URLs have an elevated chance of being logged by something. We were concerned that having the *storage index* logged may increase some risks. @@ -538,7 +538,7 @@ Rejected designs for upload secrets: it must contain randomness. Randomness means there is no need to have a secret per share, since adding share-specific content to randomness doesn't actually make the secret any better. -``PATCH /v1/immutable/:storage_index/:share_number`` +``PATCH /storage/v1/immutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Write data for the indicated share. @@ -579,7 +579,7 @@ Responses: the response is ``CONFLICT``. At this point the only thing to do is abort the upload and start from scratch (see below). -``PUT /v1/immutable/:storage_index/:share_number/abort`` +``PUT /storage/v1/immutable/:storage_index/:share_number/abort`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! This cancels an *in-progress* upload. @@ -615,7 +615,7 @@ From RFC 7231:: PATCH method defined in [RFC5789]). -``POST /v1/immutable/:storage_index/:share_number/corrupt`` +``POST /storage/v1/immutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. The @@ -634,7 +634,7 @@ couldn't be found. Reading ~~~~~~~ -``GET /v1/immutable/:storage_index/shares`` +``GET /storage/v1/immutable/:storage_index/shares`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a list (semantically, a set) indicating all shares available for the @@ -644,7 +644,7 @@ indicated storage index. For example:: An unknown storage index results in an empty list. -``GET /v1/immutable/:storage_index/:share_number`` +``GET /storage/v1/immutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Read a contiguous sequence of bytes from one share in one bucket. @@ -685,7 +685,7 @@ Mutable Writing ~~~~~~~ -``POST /v1/mutable/:storage_index/read-test-write`` +``POST /storage/v1/mutable/:storage_index/read-test-write`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! General purpose read-test-and-write operation for mutable storage indexes. @@ -741,7 +741,7 @@ As a result, if there is no data at all, an empty bytestring is returned no matt Reading ~~~~~~~ -``GET /v1/mutable/:storage_index/shares`` +``GET /storage/v1/mutable/:storage_index/shares`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a set indicating all shares available for the indicated storage index. @@ -749,10 +749,10 @@ For example (this is shown as list, since it will be list for JSON, but will be [1, 5] -``GET /v1/mutable/:storage_index/:share_number`` +``GET /storage/v1/mutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Read data from the indicated mutable shares, just like ``GET /v1/immutable/:storage_index`` +Read data from the indicated mutable shares, just like ``GET /storage/v1/immutable/:storage_index`` The ``Range`` header may be used to request exactly one ``bytes`` range, in which case the response code will be 206 (partial content). Interpretation and response behavior is as specified in RFC 7233 § 4.1. @@ -764,7 +764,7 @@ The resulting ``Content-Range`` header will be consistent with the returned data If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. -``POST /v1/mutable/:storage_index/:share_number/corrupt`` +``POST /storage/v1/mutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. @@ -778,7 +778,7 @@ Immutable Data 1. Create a bucket for storage index ``AAAAAAAAAAAAAAAA`` to hold two immutable shares, discovering that share ``1`` was already uploaded:: - POST /v1/immutable/AAAAAAAAAAAAAAAA + POST /storage/v1/immutable/AAAAAAAAAAAAAAAA Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: lease-renew-secret efgh X-Tahoe-Authorization: lease-cancel-secret jjkl @@ -791,7 +791,7 @@ Immutable Data #. Upload the content for immutable share ``7``:: - PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + PATCH /storage/v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 0-15/48 X-Tahoe-Authorization: upload-secret xyzf @@ -799,7 +799,7 @@ Immutable Data 200 OK - PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + PATCH /storage/v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 16-31/48 X-Tahoe-Authorization: upload-secret xyzf @@ -807,7 +807,7 @@ Immutable Data 200 OK - PATCH /v1/immutable/AAAAAAAAAAAAAAAA/7 + PATCH /storage/v1/immutable/AAAAAAAAAAAAAAAA/7 Authorization: Tahoe-LAFS nurl-swissnum Content-Range: bytes 32-47/48 X-Tahoe-Authorization: upload-secret xyzf @@ -817,7 +817,7 @@ Immutable Data #. Download the content of the previously uploaded immutable share ``7``:: - GET /v1/immutable/AAAAAAAAAAAAAAAA?share=7 + GET /storage/v1/immutable/AAAAAAAAAAAAAAAA?share=7 Authorization: Tahoe-LAFS nurl-swissnum Range: bytes=0-47 @@ -826,7 +826,7 @@ Immutable Data #. Renew the lease on all immutable shares in bucket ``AAAAAAAAAAAAAAAA``:: - PUT /v1/lease/AAAAAAAAAAAAAAAA + PUT /storage/v1/lease/AAAAAAAAAAAAAAAA Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: lease-cancel-secret jjkl X-Tahoe-Authorization: lease-renew-secret efgh @@ -841,7 +841,7 @@ The special test vector of size 1 but empty bytes will only pass if there is no existing share, otherwise it will read a byte which won't match `b""`:: - POST /v1/mutable/BBBBBBBBBBBBBBBB/read-test-write + POST /storage/v1/mutable/BBBBBBBBBBBBBBBB/read-test-write Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: write-enabler abcd X-Tahoe-Authorization: lease-cancel-secret efgh @@ -873,7 +873,7 @@ otherwise it will read a byte which won't match `b""`:: #. Safely rewrite the contents of a known version of mutable share number ``3`` (or fail):: - POST /v1/mutable/BBBBBBBBBBBBBBBB/read-test-write + POST /storage/v1/mutable/BBBBBBBBBBBBBBBB/read-test-write Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: write-enabler abcd X-Tahoe-Authorization: lease-cancel-secret efgh @@ -905,14 +905,14 @@ otherwise it will read a byte which won't match `b""`:: #. Download the contents of share number ``3``:: - GET /v1/mutable/BBBBBBBBBBBBBBBB?share=3&offset=0&size=10 + GET /storage/v1/mutable/BBBBBBBBBBBBBBBB?share=3&offset=0&size=10 Authorization: Tahoe-LAFS nurl-swissnum #. Renew the lease on previously uploaded mutable share in slot ``BBBBBBBBBBBBBBBB``:: - PUT /v1/lease/BBBBBBBBBBBBBBBB + PUT /storage/v1/lease/BBBBBBBBBBBBBBBB Authorization: Tahoe-LAFS nurl-swissnum X-Tahoe-Authorization: lease-cancel-secret efgh X-Tahoe-Authorization: lease-renew-secret ijkl diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index a2dc5379f..16d426dda 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -392,7 +392,7 @@ class StorageClientGeneral(object): """ Return the version metadata for the server. """ - url = self._client.relative_url("/v1/version") + url = self._client.relative_url("/storage/v1/version") response = yield self._client.request("GET", url) decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"]) returnValue(decoded_response) @@ -408,7 +408,7 @@ class StorageClientGeneral(object): Otherwise a new lease is added. """ url = self._client.relative_url( - "/v1/lease/{}".format(_encode_si(storage_index)) + "/storage/v1/lease/{}".format(_encode_si(storage_index)) ) response = yield self._client.request( "PUT", @@ -457,7 +457,9 @@ def read_share_chunk( always provided by the current callers. """ url = client.relative_url( - "/v1/{}/{}/{}".format(share_type, _encode_si(storage_index), share_number) + "/storage/v1/{}/{}/{}".format( + share_type, _encode_si(storage_index), share_number + ) ) response = yield client.request( "GET", @@ -518,7 +520,7 @@ async def advise_corrupt_share( ): assert isinstance(reason, str) url = client.relative_url( - "/v1/{}/{}/{}/corrupt".format( + "/storage/v1/{}/{}/{}/corrupt".format( share_type, _encode_si(storage_index), share_number ) ) @@ -563,7 +565,9 @@ class StorageClientImmutables(object): Result fires when creating the storage index succeeded, if creating the storage index failed the result will fire with an exception. """ - url = self._client.relative_url("/v1/immutable/" + _encode_si(storage_index)) + url = self._client.relative_url( + "/storage/v1/immutable/" + _encode_si(storage_index) + ) message = {"share-numbers": share_numbers, "allocated-size": allocated_size} response = yield self._client.request( @@ -588,7 +592,9 @@ class StorageClientImmutables(object): ) -> Deferred[None]: """Abort the upload.""" url = self._client.relative_url( - "/v1/immutable/{}/{}/abort".format(_encode_si(storage_index), share_number) + "/storage/v1/immutable/{}/{}/abort".format( + _encode_si(storage_index), share_number + ) ) response = yield self._client.request( "PUT", @@ -620,7 +626,9 @@ class StorageClientImmutables(object): been uploaded. """ url = self._client.relative_url( - "/v1/immutable/{}/{}".format(_encode_si(storage_index), share_number) + "/storage/v1/immutable/{}/{}".format( + _encode_si(storage_index), share_number + ) ) response = yield self._client.request( "PATCH", @@ -668,7 +676,7 @@ class StorageClientImmutables(object): Return the set of shares for a given storage index. """ url = self._client.relative_url( - "/v1/immutable/{}/shares".format(_encode_si(storage_index)) + "/storage/v1/immutable/{}/shares".format(_encode_si(storage_index)) ) response = yield self._client.request( "GET", @@ -774,7 +782,7 @@ class StorageClientMutables: are done and if they are valid the writes are done. """ url = self._client.relative_url( - "/v1/mutable/{}/read-test-write".format(_encode_si(storage_index)) + "/storage/v1/mutable/{}/read-test-write".format(_encode_si(storage_index)) ) message = { "test-write-vectors": { @@ -817,7 +825,7 @@ class StorageClientMutables: List the share numbers for a given storage index. """ url = self._client.relative_url( - "/v1/mutable/{}/shares".format(_encode_si(storage_index)) + "/storage/v1/mutable/{}/shares".format(_encode_si(storage_index)) ) response = await self._client.request("GET", url) if response.code == http.OK: diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 68d0740b1..2e9b57b13 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -545,7 +545,7 @@ class HTTPServer(object): ##### Generic APIs ##### - @_authorized_route(_app, set(), "/v1/version", methods=["GET"]) + @_authorized_route(_app, set(), "/storage/v1/version", methods=["GET"]) def version(self, request, authorization): """Return version information.""" return self._send_encoded(request, self._storage_server.get_version()) @@ -555,7 +555,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.LEASE_RENEW, Secrets.LEASE_CANCEL, Secrets.UPLOAD}, - "/v1/immutable/", + "/storage/v1/immutable/", methods=["POST"], ) def allocate_buckets(self, request, authorization, storage_index): @@ -591,7 +591,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.UPLOAD}, - "/v1/immutable///abort", + "/storage/v1/immutable///abort", methods=["PUT"], ) def abort_share_upload(self, request, authorization, storage_index, share_number): @@ -622,7 +622,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.UPLOAD}, - "/v1/immutable//", + "/storage/v1/immutable//", methods=["PATCH"], ) def write_share_data(self, request, authorization, storage_index, share_number): @@ -665,7 +665,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/immutable//shares", + "/storage/v1/immutable//shares", methods=["GET"], ) def list_shares(self, request, authorization, storage_index): @@ -678,7 +678,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/immutable//", + "/storage/v1/immutable//", methods=["GET"], ) def read_share_chunk(self, request, authorization, storage_index, share_number): @@ -694,7 +694,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.LEASE_RENEW, Secrets.LEASE_CANCEL}, - "/v1/lease/", + "/storage/v1/lease/", methods=["PUT"], ) def add_or_renew_lease(self, request, authorization, storage_index): @@ -715,7 +715,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/immutable///corrupt", + "/storage/v1/immutable///corrupt", methods=["POST"], ) def advise_corrupt_share_immutable( @@ -736,7 +736,7 @@ class HTTPServer(object): @_authorized_route( _app, {Secrets.LEASE_RENEW, Secrets.LEASE_CANCEL, Secrets.WRITE_ENABLER}, - "/v1/mutable//read-test-write", + "/storage/v1/mutable//read-test-write", methods=["POST"], ) def mutable_read_test_write(self, request, authorization, storage_index): @@ -771,7 +771,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/mutable//", + "/storage/v1/mutable//", methods=["GET"], ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): @@ -795,7 +795,10 @@ class HTTPServer(object): return read_range(request, read_data, share_length) @_authorized_route( - _app, set(), "/v1/mutable//shares", methods=["GET"] + _app, + set(), + "/storage/v1/mutable//shares", + methods=["GET"], ) def enumerate_mutable_shares(self, request, authorization, storage_index): """List mutable shares for a storage index.""" @@ -805,7 +808,7 @@ class HTTPServer(object): @_authorized_route( _app, set(), - "/v1/mutable///corrupt", + "/storage/v1/mutable///corrupt", methods=["POST"], ) def advise_corrupt_share_mutable( diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 419052282..4a912cf6c 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -255,7 +255,7 @@ class TestApp(object): else: return "BAD: {}".format(authorization) - @_authorized_route(_app, set(), "/v1/version", methods=["GET"]) + @_authorized_route(_app, set(), "/storage/v1/version", methods=["GET"]) def bad_version(self, request, authorization): """Return version result that violates the expected schema.""" request.setHeader("content-type", CBOR_MIME_TYPE) @@ -534,7 +534,7 @@ class GenericHTTPAPITests(SyncTestCase): lease_secret = urandom(32) storage_index = urandom(16) url = self.http.client.relative_url( - "/v1/immutable/" + _encode_si(storage_index) + "/storage/v1/immutable/" + _encode_si(storage_index) ) message = {"bad-message": "missing expected keys"} @@ -1418,7 +1418,7 @@ class SharedImmutableMutableTestsMixin: self.http.client.request( "GET", self.http.client.relative_url( - "/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + "/storage/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) ), ) ) @@ -1441,7 +1441,7 @@ class SharedImmutableMutableTestsMixin: self.http.client.request( "GET", self.http.client.relative_url( - "/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + "/storage/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) ), headers=headers, ) From f5b374a7a2ad95232e8cddca3d9d334f4f4b6986 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 10:56:11 -0400 Subject: [PATCH 160/289] Make sphinx happy. --- docs/proposed/http-storage-node-protocol.rst | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index ec800367c..a44408e6c 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -396,7 +396,7 @@ General ~~~~~~~ ``GET /storage/v1/version`` -!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve information about the version of the storage server. Information is returned as an encoded mapping. @@ -415,7 +415,7 @@ For example:: } ``PUT /storage/v1/lease/:storage_index`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Either renew or create a new lease on the bucket addressed by ``storage_index``. @@ -468,7 +468,7 @@ Writing ~~~~~~~ ``POST /storage/v1/immutable/:storage_index`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Initialize an immutable storage index with some buckets. The buckets may have share data written to them once. @@ -539,7 +539,7 @@ Rejected designs for upload secrets: Randomness means there is no need to have a secret per share, since adding share-specific content to randomness doesn't actually make the secret any better. ``PATCH /storage/v1/immutable/:storage_index/:share_number`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Write data for the indicated share. The share number must belong to the storage index. @@ -580,7 +580,7 @@ Responses: At this point the only thing to do is abort the upload and start from scratch (see below). ``PUT /storage/v1/immutable/:storage_index/:share_number/abort`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! This cancels an *in-progress* upload. @@ -616,7 +616,7 @@ From RFC 7231:: ``POST /storage/v1/immutable/:storage_index/:share_number/corrupt`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. The request body includes an human-meaningful text string with details about the @@ -635,7 +635,7 @@ Reading ~~~~~~~ ``GET /storage/v1/immutable/:storage_index/shares`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a list (semantically, a set) indicating all shares available for the indicated storage index. For example:: @@ -645,7 +645,7 @@ indicated storage index. For example:: An unknown storage index results in an empty list. ``GET /storage/v1/immutable/:storage_index/:share_number`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Read a contiguous sequence of bytes from one share in one bucket. The response body is the raw share data (i.e., ``application/octet-stream``). @@ -686,7 +686,7 @@ Writing ~~~~~~~ ``POST /storage/v1/mutable/:storage_index/read-test-write`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! General purpose read-test-and-write operation for mutable storage indexes. A mutable storage index is also called a "slot" @@ -742,7 +742,7 @@ Reading ~~~~~~~ ``GET /storage/v1/mutable/:storage_index/shares`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a set indicating all shares available for the indicated storage index. For example (this is shown as list, since it will be list for JSON, but will be set for CBOR):: @@ -765,7 +765,7 @@ If the response to a query is an empty range, the ``NO CONTENT`` (204) response ``POST /storage/v1/mutable/:storage_index/:share_number/corrupt`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Advise the server the data read from the indicated share was corrupt. Just like the immutable version. From 4a573ede3461510d6f2aa09f78d2791dea8393b9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 15 Sep 2022 11:29:32 -0400 Subject: [PATCH 161/289] Download the actual data we need, instead of relying on bad reading-beyond-the-end semantics. --- src/allmydata/immutable/layout.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 6679fc94c..07b6b8b3b 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -17,8 +17,10 @@ from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE from allmydata.util import mathutil, observer, pipeline, log from allmydata.util.assertutil import precondition +from allmydata.util.deferredutil import async_to_deferred from allmydata.storage.server import si_b2a + class LayoutInvalid(Exception): """ There is something wrong with these bytes so they can't be interpreted as the kind of immutable file that I know how to download.""" @@ -311,8 +313,6 @@ class WriteBucketProxy_v2(WriteBucketProxy): @implementer(IStorageBucketReader) class ReadBucketProxy(object): - MAX_UEB_SIZE = 2000 # actual size is closer to 419, but varies by a few bytes - def __init__(self, rref, server, storage_index): self._rref = rref self._server = server @@ -389,10 +389,15 @@ class ReadBucketProxy(object): self._offsets[field] = offset return self._offsets - def _fetch_sharehashtree_and_ueb(self, offsets): + @async_to_deferred + async def _fetch_sharehashtree_and_ueb(self, offsets): + [ueb_length] = struct.unpack( + await self._read(offsets['share_hashes'], self._fieldsize), + self._fieldstruct + ) sharehashtree_size = offsets['uri_extension'] - offsets['share_hashes'] return self._read(offsets['share_hashes'], - self.MAX_UEB_SIZE+sharehashtree_size) + ueb_length + self._fieldsize +sharehashtree_size) def _parse_sharehashtree_and_ueb(self, data): sharehashtree_size = self._offsets['uri_extension'] - self._offsets['share_hashes'] From 444bc724c54a07ef4e0dddb53706e1e1d16091b3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 16 Sep 2022 10:38:29 -0400 Subject: [PATCH 162/289] A better approach to MAX_UEB_SIZE: just delete the code since it's not used in practice. --- src/allmydata/immutable/layout.py | 59 ++++++------------------------- 1 file changed, 10 insertions(+), 49 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 07b6b8b3b..d552d43c4 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -17,7 +17,6 @@ from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE from allmydata.util import mathutil, observer, pipeline, log from allmydata.util.assertutil import precondition -from allmydata.util.deferredutil import async_to_deferred from allmydata.storage.server import si_b2a @@ -340,11 +339,6 @@ class ReadBucketProxy(object): # TODO: for small shares, read the whole bucket in _start() d = self._fetch_header() d.addCallback(self._parse_offsets) - # XXX The following two callbacks implement a slightly faster/nicer - # way to get the ueb and sharehashtree, but it requires that the - # storage server be >= v1.3.0. - # d.addCallback(self._fetch_sharehashtree_and_ueb) - # d.addCallback(self._parse_sharehashtree_and_ueb) def _fail_waiters(f): self._ready.fire(f) def _notify_waiters(result): @@ -389,34 +383,6 @@ class ReadBucketProxy(object): self._offsets[field] = offset return self._offsets - @async_to_deferred - async def _fetch_sharehashtree_and_ueb(self, offsets): - [ueb_length] = struct.unpack( - await self._read(offsets['share_hashes'], self._fieldsize), - self._fieldstruct - ) - sharehashtree_size = offsets['uri_extension'] - offsets['share_hashes'] - return self._read(offsets['share_hashes'], - ueb_length + self._fieldsize +sharehashtree_size) - - def _parse_sharehashtree_and_ueb(self, data): - sharehashtree_size = self._offsets['uri_extension'] - self._offsets['share_hashes'] - if len(data) < sharehashtree_size: - raise LayoutInvalid("share hash tree truncated -- should have at least %d bytes -- not %d" % (sharehashtree_size, len(data))) - if sharehashtree_size % (2+HASH_SIZE) != 0: - raise LayoutInvalid("share hash tree malformed -- should have an even multiple of %d bytes -- not %d" % (2+HASH_SIZE, sharehashtree_size)) - self._share_hashes = [] - for i in range(0, sharehashtree_size, 2+HASH_SIZE): - hashnum = struct.unpack(">H", data[i:i+2])[0] - hashvalue = data[i+2:i+2+HASH_SIZE] - self._share_hashes.append( (hashnum, hashvalue) ) - - i = self._offsets['uri_extension']-self._offsets['share_hashes'] - if len(data) < i+self._fieldsize: - raise LayoutInvalid("not enough bytes to encode URI length -- should be at least %d bytes long, not %d " % (i+self._fieldsize, len(data),)) - length = struct.unpack(self._fieldstruct, data[i:i+self._fieldsize])[0] - self._ueb_data = data[i+self._fieldsize:i+self._fieldsize+length] - def _get_block_data(self, unused, blocknum, blocksize, thisblocksize): offset = self._offsets['data'] + blocknum * blocksize return self._read(offset, thisblocksize) @@ -459,20 +425,18 @@ class ReadBucketProxy(object): else: return defer.succeed([]) - def _get_share_hashes(self, unused=None): - if hasattr(self, '_share_hashes'): - return self._share_hashes - return self._get_share_hashes_the_old_way() - def get_share_hashes(self): d = self._start_if_needed() d.addCallback(self._get_share_hashes) return d - def _get_share_hashes_the_old_way(self): + def _get_share_hashes(self, _ignore): """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the end of the share, so we need to use the offset and - read just that much.""" + read just that much. + + HTTP-based storage protocol also doesn't like reading past the end. + """ offset = self._offsets['share_hashes'] size = self._offsets['uri_extension'] - offset if size % (2+HASH_SIZE) != 0: @@ -490,10 +454,13 @@ class ReadBucketProxy(object): d.addCallback(_unpack_share_hashes) return d - def _get_uri_extension_the_old_way(self, unused=None): + def _get_uri_extension(self, unused=None): """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the end of the share, so we need to fetch the UEB size - and then read just that much.""" + and then read just that much. + + HTTP-based storage protocol also doesn't like reading past the end. + """ offset = self._offsets['uri_extension'] d = self._read(offset, self._fieldsize) def _got_length(data): @@ -510,12 +477,6 @@ class ReadBucketProxy(object): d.addCallback(_got_length) return d - def _get_uri_extension(self, unused=None): - if hasattr(self, '_ueb_data'): - return self._ueb_data - else: - return self._get_uri_extension_the_old_way() - def get_uri_extension(self): d = self._start_if_needed() d.addCallback(self._get_uri_extension) From fb532a71ef4da28c91594e8d05695e267b747137 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 13 Sep 2022 22:43:09 -0600 Subject: [PATCH 163/289] own pid-file checks --- setup.py | 3 ++ src/allmydata/scripts/tahoe_run.py | 36 ++++++++++----- src/allmydata/util/pid.py | 72 ++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 12 deletions(-) create mode 100644 src/allmydata/util/pid.py diff --git a/setup.py b/setup.py index c3ee4eb90..bd16a61ce 100644 --- a/setup.py +++ b/setup.py @@ -138,6 +138,9 @@ install_requires = [ "treq", "cbor2", "pycddl", + + # for pid-file support + "psutil", ] setup_requires = [ diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 51be32ee3..21041f1ab 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -19,6 +19,7 @@ import os, sys from allmydata.scripts.common import BasedirOptions from twisted.scripts import twistd from twisted.python import usage +from twisted.python.filepath import FilePath from twisted.python.reflect import namedAny from twisted.internet.defer import maybeDeferred from twisted.application.service import Service @@ -27,6 +28,11 @@ from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin +from allmydata.util.pid import ( + check_pid_process, + cleanup_pidfile, + ProcessInTheWay, +) from allmydata.storage.crawler import ( MigratePickleFileError, ) @@ -35,28 +41,31 @@ from allmydata.node import ( PrivacyError, ) + def get_pidfile(basedir): """ Returns the path to the PID file. :param basedir: the node's base directory :returns: the path to the PID file """ - return os.path.join(basedir, u"twistd.pid") + return os.path.join(basedir, u"running.process") + def get_pid_from_pidfile(pidfile): """ Tries to read and return the PID stored in the node's PID file - (twistd.pid). + :param pidfile: try to read this PID file :returns: A numeric PID on success, ``None`` if PID file absent or inaccessible, ``-1`` if PID file invalid. """ try: with open(pidfile, "r") as f: - pid = f.read() + data = f.read().strip() except EnvironmentError: return None + pid, _ = data.split() try: pid = int(pid) except ValueError: @@ -64,6 +73,7 @@ def get_pid_from_pidfile(pidfile): return pid + def identify_node_type(basedir): """ :return unicode: None or one of: 'client' or 'introducer'. @@ -227,10 +237,8 @@ def run(config, runApp=twistd.runApp): print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - twistd_args = ["--nodaemon", "--rundir", basedir] - if sys.platform != "win32": - pidfile = get_pidfile(basedir) - twistd_args.extend(["--pidfile", pidfile]) + # we turn off Twisted's pid-file to use our own + twistd_args = ["--pidfile", None, "--nodaemon", "--rundir", basedir] twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -246,12 +254,16 @@ def run(config, runApp=twistd.runApp): return 1 twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} - # handle invalid PID file (twistd might not start otherwise) - if sys.platform != "win32" and get_pid_from_pidfile(pidfile) == -1: - print("found invalid PID file in %s - deleting it" % basedir, file=err) - os.remove(pidfile) + # before we try to run, check against our pidfile -- this will + # raise an exception if there appears to be a running process "in + # the way" + pidfile = FilePath(get_pidfile(config['basedir'])) + try: + check_pid_process(pidfile) + except ProcessInTheWay as e: + print("ERROR: {}".format(e)) + return 1 # We always pass --nodaemon so twistd.runApp does not daemonize. - print("running node in %s" % (quoted_basedir,), file=out) runApp(twistd_config) return 0 diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py new file mode 100644 index 000000000..21e30aa87 --- /dev/null +++ b/src/allmydata/util/pid.py @@ -0,0 +1,72 @@ +import os +import psutil + + +class ProcessInTheWay(Exception): + """ + our pidfile points at a running process + """ + + +def check_pid_process(pidfile, find_process=None): + """ + If another instance appears to be running already, raise an + exception. Otherwise, write our PID + start time to the pidfile + and arrange to delete it upon exit. + + :param FilePath pidfile: the file to read/write our PID from. + + :param Callable find_process: None, or a custom way to get a + Process objet (usually for tests) + + :raises ProcessInTheWay: if a running process exists at our PID + """ + find_process = psutil.Process if find_process is None else find_process + # check if we have another instance running already + if pidfile.exists(): + with pidfile.open("r") as f: + content = f.read().decode("utf8").strip() + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + try: + # if any other process is running at that PID, let the + # user decide if this is another magic-older + # instance. Automated programs may use the start-time to + # help decide this (if the PID is merely recycled, the + # start-time won't match). + proc = find_process(pid) + raise ProcessInTheWay( + "A process is already running as PID {}".format(pid) + ) + except psutil.NoSuchProcess: + print( + "'{pidpath}' refers to {pid} that isn't running".format( + pidpath=pidfile.path, + pid=pid, + ) + ) + # nothing is running at that PID so it must be a stale file + pidfile.remove() + + # write our PID + start-time to the pid-file + pid = os.getpid() + starttime = find_process(pid).create_time() + with pidfile.open("w") as f: + f.write("{} {}\n".format(pid, starttime).encode("utf8")) + + +def cleanup_pidfile(pidfile): + """ + Safely remove the given pidfile + """ + + try: + pidfile.remove() + except Exception as e: + print( + "Couldn't remove '{pidfile}': {err}.".format( + pidfile=pidfile.path, + err=e, + ) + ) From 3bfb60c6f426cadc25bb201e6e59165cedd2b490 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 19:57:01 -0600 Subject: [PATCH 164/289] back to context-manager, simplify --- src/allmydata/scripts/tahoe_run.py | 15 +++++++++------ src/allmydata/test/cli/test_run.py | 20 +++++++++++--------- src/allmydata/util/pid.py | 29 +++++++++++++++++++++-------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 21041f1ab..07f5bf72c 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -30,8 +30,8 @@ from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin from allmydata.util.pid import ( check_pid_process, - cleanup_pidfile, ProcessInTheWay, + InvalidPidFile, ) from allmydata.storage.crawler import ( MigratePickleFileError, @@ -237,8 +237,13 @@ def run(config, runApp=twistd.runApp): print("%s is not a recognizable node directory" % quoted_basedir, file=err) return 1 - # we turn off Twisted's pid-file to use our own - twistd_args = ["--pidfile", None, "--nodaemon", "--rundir", basedir] + twistd_args = [ + # turn off Twisted's pid-file to use our own + "--pidfile", None, + # ensure twistd machinery does not daemonize. + "--nodaemon", + "--rundir", basedir, + ] twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin @@ -254,9 +259,7 @@ def run(config, runApp=twistd.runApp): return 1 twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} - # before we try to run, check against our pidfile -- this will - # raise an exception if there appears to be a running process "in - # the way" + # our own pid-style file contains PID and process creation time pidfile = FilePath(get_pidfile(config['basedir'])) try: check_pid_process(pidfile) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 28613e8c1..db01eb440 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -159,7 +159,7 @@ class RunTests(SyncTestCase): """ basedir = FilePath(self.mktemp()).asTextMode() basedir.makedirs() - basedir.child(u"twistd.pid").setContent(b"foo") + basedir.child(u"running.process").setContent(b"foo") basedir.child(u"tahoe-client.tac").setContent(b"") config = RunOptions() @@ -168,17 +168,19 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] - runs = [] - result_code = run(config, runApp=runs.append) + class DummyRunner: + runs = [] + _exitSignal = None + + def run(self): + self.runs.append(True) + + result_code = run(config, runner=DummyRunner()) self.assertThat( config.stderr.getvalue(), Contains("found invalid PID file in"), ) self.assertThat( - runs, - HasLength(1), - ) - self.assertThat( - result_code, - Equals(0), + DummyRunner.runs, + Equals([]) ) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 21e30aa87..3b488a2c2 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,5 +1,8 @@ import os import psutil +from contextlib import ( + contextmanager, +) class ProcessInTheWay(Exception): @@ -8,6 +11,13 @@ class ProcessInTheWay(Exception): """ +class InvalidPidFile(Exception): + """ + our pidfile isn't well-formed + """ + + +@contextmanager def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -26,9 +36,16 @@ def check_pid_process(pidfile, find_process=None): if pidfile.exists(): with pidfile.open("r") as f: content = f.read().decode("utf8").strip() - pid, starttime = content.split() - pid = int(pid) - starttime = float(starttime) + try: + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + except ValueError: + raise InvalidPidFile( + "found invalid PID file in {}".format( + pidfile + ) + ) try: # if any other process is running at that PID, let the # user decide if this is another magic-older @@ -55,11 +72,7 @@ def check_pid_process(pidfile, find_process=None): with pidfile.open("w") as f: f.write("{} {}\n".format(pid, starttime).encode("utf8")) - -def cleanup_pidfile(pidfile): - """ - Safely remove the given pidfile - """ + yield # setup completed, await cleanup try: pidfile.remove() From cad162bb8fb2d961c74f457be6e4495b00f0aeed Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 19:59:18 -0600 Subject: [PATCH 165/289] should have pid-file on windows too, now --- src/allmydata/test/cli/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index db01eb440..902e4011a 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -151,7 +151,7 @@ class RunTests(SyncTestCase): """ Tests for ``run``. """ - @skipIf(platform.isWindows(), "There are no PID files on Windows.") + def test_non_numeric_pid(self): """ If the pidfile exists but does not contain a numeric value, a complaint to From 0e0ebf6687280d0be5ae6a536a4f9d48958d03b7 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 20:06:32 -0600 Subject: [PATCH 166/289] more testing --- src/allmydata/test/cli/test_run.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 902e4011a..ecc81fe3f 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -20,6 +20,9 @@ from testtools import ( skipIf, ) +from hypothesis.strategies import text +from hypothesis import given + from testtools.matchers import ( Contains, Equals, @@ -44,6 +47,10 @@ from ...scripts.tahoe_run import ( RunOptions, run, ) +from ...util.pid import ( + check_pid_process, + InvalidPidFile, +) from ...scripts.runner import ( parse_options @@ -180,7 +187,18 @@ class RunTests(SyncTestCase): config.stderr.getvalue(), Contains("found invalid PID file in"), ) + # because the pidfile is invalid we shouldn't get to the + # .run() call itself. self.assertThat( DummyRunner.runs, Equals([]) ) + + @given(text()) + def test_pidfile_contents(self, content): + pidfile = FilePath("pidfile") + pidfile.setContent(content.encode("utf8")) + + with self.assertRaises(InvalidPidFile): + with check_pid_process(pidfile): + pass From e6adfc7726cc3e081d18b712e573ef265e49c3ca Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 20:22:07 -0600 Subject: [PATCH 167/289] news --- newsfragments/3926.incompat | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 newsfragments/3926.incompat diff --git a/newsfragments/3926.incompat b/newsfragments/3926.incompat new file mode 100644 index 000000000..3f58b4ba8 --- /dev/null +++ b/newsfragments/3926.incompat @@ -0,0 +1,10 @@ +Record both the PID and the process creation-time + +A new kind of pidfile in `running.process` records both +the PID and the creation-time of the process. This facilitates +automatic discovery of a "stale" pidfile that points to a +currently-running process. If the recorded creation-time matches +the creation-time of the running process, then it is a still-running +`tahoe run` proecss. Otherwise, the file is stale. + +The `twistd.pid` file is no longer present. \ No newline at end of file From 6048d1d9a99e5f88cd423a9524bede823277709f Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 21:13:30 -0600 Subject: [PATCH 168/289] in case hypothesis finds the magic --- src/allmydata/test/cli/test_run.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index ecc81fe3f..7bf87eea9 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -12,6 +12,7 @@ from future.utils import PY2 if PY2: from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +import re from six.moves import ( StringIO, ) @@ -21,7 +22,7 @@ from testtools import ( ) from hypothesis.strategies import text -from hypothesis import given +from hypothesis import given, assume from testtools.matchers import ( Contains, @@ -194,8 +195,11 @@ class RunTests(SyncTestCase): Equals([]) ) + good_file_content_re = re.compile(r"\w[0-9]*\w[0-9]*\w") + @given(text()) def test_pidfile_contents(self, content): + assume(not self.good_file_content_re.match(content)) pidfile = FilePath("pidfile") pidfile.setContent(content.encode("utf8")) From 642b604753dd9b9af2c740e04e65e58bbae00299 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 21:51:56 -0600 Subject: [PATCH 169/289] use stdin-closing for pidfile cleanup too --- src/allmydata/scripts/tahoe_run.py | 1 + src/allmydata/test/cli/test_run.py | 12 +++--------- src/allmydata/util/pid.py | 17 +++++++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 07f5bf72c..20d5c2bf1 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -30,6 +30,7 @@ from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin from allmydata.util.pid import ( check_pid_process, + cleanup_pidfile, ProcessInTheWay, InvalidPidFile, ) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 7bf87eea9..71085fddd 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -176,14 +176,8 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] - class DummyRunner: - runs = [] - _exitSignal = None - - def run(self): - self.runs.append(True) - - result_code = run(config, runner=DummyRunner()) + runs = [] + result_code = run(config, runApp=runs.append) self.assertThat( config.stderr.getvalue(), Contains("found invalid PID file in"), @@ -191,7 +185,7 @@ class RunTests(SyncTestCase): # because the pidfile is invalid we shouldn't get to the # .run() call itself. self.assertThat( - DummyRunner.runs, + runs, Equals([]) ) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 3b488a2c2..3ab955cb3 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,8 +1,5 @@ import os import psutil -from contextlib import ( - contextmanager, -) class ProcessInTheWay(Exception): @@ -17,7 +14,12 @@ class InvalidPidFile(Exception): """ -@contextmanager +class CannotRemovePidFile(Exception): + """ + something went wrong removing the pidfile + """ + + def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -72,12 +74,15 @@ def check_pid_process(pidfile, find_process=None): with pidfile.open("w") as f: f.write("{} {}\n".format(pid, starttime).encode("utf8")) - yield # setup completed, await cleanup +def cleanup_pidfile(pidfile): + """ + Safely clean up a PID-file + """ try: pidfile.remove() except Exception as e: - print( + raise CannotRemovePidFile( "Couldn't remove '{pidfile}': {err}.".format( pidfile=pidfile.path, err=e, From 82c72ddede1dbbe97365877186af27928a996c0b Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 21:58:20 -0600 Subject: [PATCH 170/289] cleanup --- src/allmydata/test/cli/test_run.py | 14 ++------------ src/allmydata/util/pid.py | 4 ++-- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 71085fddd..ae869e475 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -17,22 +17,14 @@ from six.moves import ( StringIO, ) -from testtools import ( - skipIf, -) - from hypothesis.strategies import text from hypothesis import given, assume from testtools.matchers import ( Contains, Equals, - HasLength, ) -from twisted.python.runtime import ( - platform, -) from twisted.python.filepath import ( FilePath, ) @@ -184,10 +176,8 @@ class RunTests(SyncTestCase): ) # because the pidfile is invalid we shouldn't get to the # .run() call itself. - self.assertThat( - runs, - Equals([]) - ) + self.assertThat(runs, Equals([])) + self.assertThat(result_code, Equals(1)) good_file_content_re = re.compile(r"\w[0-9]*\w[0-9]*\w") diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 3ab955cb3..ff8129bbc 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -50,11 +50,11 @@ def check_pid_process(pidfile, find_process=None): ) try: # if any other process is running at that PID, let the - # user decide if this is another magic-older + # user decide if this is another legitimate # instance. Automated programs may use the start-time to # help decide this (if the PID is merely recycled, the # start-time won't match). - proc = find_process(pid) + find_process(pid) raise ProcessInTheWay( "A process is already running as PID {}".format(pid) ) From 228bbbc2fe791b83af0d495df44882a63456b59f Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 22:39:59 -0600 Subject: [PATCH 171/289] new pid-file --- src/allmydata/test/cli_node_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli_node_api.py b/src/allmydata/test/cli_node_api.py index 410796be2..c324d5565 100644 --- a/src/allmydata/test/cli_node_api.py +++ b/src/allmydata/test/cli_node_api.py @@ -134,7 +134,7 @@ class CLINodeAPI(object): @property def twistd_pid_file(self): - return self.basedir.child(u"twistd.pid") + return self.basedir.child(u"running.process") @property def node_url_file(self): From 114d5e1ed8582fa130953227eced0528862ca381 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 15 Sep 2022 23:08:46 -0600 Subject: [PATCH 172/289] pidfile on windows now --- src/allmydata/scripts/tahoe_run.py | 6 +++-- src/allmydata/test/test_runner.py | 36 ++++++++++++------------------ 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 20d5c2bf1..72b8e3eca 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -239,12 +239,14 @@ def run(config, runApp=twistd.runApp): return 1 twistd_args = [ - # turn off Twisted's pid-file to use our own - "--pidfile", None, # ensure twistd machinery does not daemonize. "--nodaemon", "--rundir", basedir, ] + if sys.platform != "win32": + # turn off Twisted's pid-file to use our own -- but only on + # windows, because twistd doesn't know about pidfiles there + twistd_args.extend(["--pidfile", None]) twistd_args.extend(config.twistd_args) twistd_args.append("DaemonizeTahoeNode") # point at our DaemonizeTahoeNodePlugin diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 3eb6b8a34..9b6357f46 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -418,9 +418,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): tahoe.active() - # We don't keep track of PIDs in files on Windows. - if not platform.isWindows(): - self.assertTrue(tahoe.twistd_pid_file.exists()) + self.assertTrue(tahoe.twistd_pid_file.exists()) self.assertTrue(tahoe.node_url_file.exists()) # rm this so we can detect when the second incarnation is ready @@ -493,9 +491,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): # change on restart storage_furl = fileutil.read(tahoe.storage_furl_file.path) - # We don't keep track of PIDs in files on Windows. - if not platform.isWindows(): - self.assertTrue(tahoe.twistd_pid_file.exists()) + self.assertTrue(tahoe.twistd_pid_file.exists()) # rm this so we can detect when the second incarnation is ready tahoe.node_url_file.remove() @@ -513,21 +509,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): fileutil.read(tahoe.storage_furl_file.path), ) - if not platform.isWindows(): - self.assertTrue( - tahoe.twistd_pid_file.exists(), - "PID file ({}) didn't exist when we expected it to. " - "These exist: {}".format( - tahoe.twistd_pid_file, - tahoe.twistd_pid_file.parent().listdir(), - ), - ) + self.assertTrue( + tahoe.twistd_pid_file.exists(), + "PID file ({}) didn't exist when we expected it to. " + "These exist: {}".format( + tahoe.twistd_pid_file, + tahoe.twistd_pid_file.parent().listdir(), + ), + ) yield tahoe.stop_and_wait() - if not platform.isWindows(): - # twistd.pid should be gone by now. - self.assertFalse(tahoe.twistd_pid_file.exists()) - + # twistd.pid should be gone by now. + self.assertFalse(tahoe.twistd_pid_file.exists()) def _remove(self, res, file): fileutil.remove(file) @@ -610,9 +603,8 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): ), ) - if not platform.isWindows(): - # It should not be running. - self.assertFalse(tahoe.twistd_pid_file.exists()) + # It should not be running. + self.assertFalse(tahoe.twistd_pid_file.exists()) # Wait for the operation to *complete*. If we got this far it's # because we got the expected message so we can expect the "tahoe ..." From aef2e96139fc0afc610736b181e218dce2aa9b79 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 17 Sep 2022 16:28:25 -0600 Subject: [PATCH 173/289] refactor: dispatch with our reactor, pass to tahoe_run --- src/allmydata/scripts/runner.py | 12 ++++-------- src/allmydata/scripts/tahoe_run.py | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index a0d8a752b..756c26f2c 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -47,11 +47,6 @@ if _default_nodedir: NODEDIR_HELP += " [default for most commands: " + quote_local_unicode_path(_default_nodedir) + "]" -# XXX all this 'dispatch' stuff needs to be unified + fixed up -_control_node_dispatch = { - "run": tahoe_run.run, -} - process_control_commands = [ ("run", None, tahoe_run.RunOptions, "run a node without daemonizing"), ] # type: SubCommands @@ -195,6 +190,7 @@ def parse_or_exit(config, argv, stdout, stderr): return config def dispatch(config, + reactor, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr): command = config.subCommand so = config.subOptions @@ -206,8 +202,8 @@ def dispatch(config, if command in create_dispatch: f = create_dispatch[command] - elif command in _control_node_dispatch: - f = _control_node_dispatch[command] + elif command == "run": + f = lambda config: tahoe_run.run(reactor, config) elif command in debug.dispatch: f = debug.dispatch[command] elif command in admin.dispatch: @@ -361,7 +357,7 @@ def _run_with_reactor(reactor, config, argv, stdout, stderr): stderr, ) d.addCallback(_maybe_enable_eliot_logging, reactor) - d.addCallback(dispatch, stdout=stdout, stderr=stderr) + d.addCallback(dispatch, reactor, stdout=stdout, stderr=stderr) def _show_exception(f): # when task.react() notices a non-SystemExit exception, it does # log.err() with the failure and then exits with rc=1. We want this diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 72b8e3eca..dd4561a4b 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -217,7 +217,7 @@ class DaemonizeTahoeNodePlugin(object): return DaemonizeTheRealService(self.nodetype, self.basedir, so) -def run(config, runApp=twistd.runApp): +def run(reactor, config, runApp=twistd.runApp): """ Runs a Tahoe-LAFS node in the foreground. From 8b2cb79070edabd20fb9bdbb41de51458788e50a Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 17 Sep 2022 16:29:03 -0600 Subject: [PATCH 174/289] cleanup via reactor --- src/allmydata/scripts/tahoe_run.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index dd4561a4b..a5b833233 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -269,6 +269,11 @@ def run(reactor, config, runApp=twistd.runApp): except ProcessInTheWay as e: print("ERROR: {}".format(e)) return 1 + else: + reactor.addSystemEventTrigger( + "during", "shutdown", + lambda: cleanup_pidfile(pidfile) + ) # We always pass --nodaemon so twistd.runApp does not daemonize. runApp(twistd_config) From 254a994eb53035b70a653b47b2951d6159634a23 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 17 Sep 2022 16:41:17 -0600 Subject: [PATCH 175/289] flake8 --- src/allmydata/scripts/tahoe_run.py | 2 +- src/allmydata/test/test_runner.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index a5b833233..7722fef51 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -266,7 +266,7 @@ def run(reactor, config, runApp=twistd.runApp): pidfile = FilePath(get_pidfile(config['basedir'])) try: check_pid_process(pidfile) - except ProcessInTheWay as e: + except (ProcessInTheWay, InvalidPidFile) as e: print("ERROR: {}".format(e)) return 1 else: diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 9b6357f46..14d0dfb7f 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -47,9 +47,6 @@ from twisted.internet.defer import ( DeferredList, ) from twisted.python.filepath import FilePath -from twisted.python.runtime import ( - platform, -) from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv from allmydata.test import common_util From fe80126e3fcffe56c171188c7cd5847f19bf6f7b Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 18 Sep 2022 22:39:25 -0600 Subject: [PATCH 176/289] fixups --- src/allmydata/scripts/tahoe_run.py | 2 +- src/allmydata/test/cli/test_run.py | 4 +++- src/allmydata/test/common_util.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 7722fef51..6dfa726a3 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -267,7 +267,7 @@ def run(reactor, config, runApp=twistd.runApp): try: check_pid_process(pidfile) except (ProcessInTheWay, InvalidPidFile) as e: - print("ERROR: {}".format(e)) + print("ERROR: {}".format(e), file=err) return 1 else: reactor.addSystemEventTrigger( diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index ae869e475..6358b70dd 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -168,8 +168,10 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] + from twisted.internet import reactor + runs = [] - result_code = run(config, runApp=runs.append) + result_code = run(reactor, config, runApp=runs.append) self.assertThat( config.stderr.getvalue(), Contains("found invalid PID file in"), diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index e63c3eef8..b6d352ab1 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -145,6 +145,7 @@ def run_cli_native(verb, *args, **kwargs): ) d.addCallback( runner.dispatch, + reactor, stdin=stdin, stdout=stdout, stderr=stderr, From ef0b2aca1769dbdf11c5eb50b66c186d2ee9e22f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 19 Sep 2022 10:12:11 -0400 Subject: [PATCH 177/289] Adjust NURL spec to new decisions. --- docs/specifications/url.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 31fb05fad..1ce3b2a7f 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -47,27 +47,27 @@ This can be considered to expand to "**N**\ ew URLs" or "Authe\ **N**\ ticating The anticipated use for a **NURL** will still be to establish a TLS connection to a peer. The protocol run over that TLS connection could be Foolscap though it is more likely to be an HTTP-based protocol (such as GBS). +Unlike fURLs, only a single net-loc is included, for consistency with other forms of URLs. +As a result, multiple NURLs may be available for a single server. + Syntax ------ The EBNF for a NURL is as follows:: - nurl = scheme, hash, "@", net-loc-list, "/", swiss-number, [ version1 ] - - scheme = "pb://" + nurl = tcp-nurl | tor-nurl | i2p-nurl + tcp-nurl = "pb://", hash, "@", tcp-loc, "/", swiss-number, [ version1 ] + tor-nurl = "pb+tor://", hash, "@", tcp-loc, "/", swiss-number, [ version1 ] + i2p-nurl = "pb+i2p://", hash, "@", i2p-loc, "/", swiss-number, [ version1 ] hash = unreserved - net-loc-list = net-loc, [ { ",", net-loc } ] - net-loc = tcp-loc | tor-loc | i2p-loc - - tcp-loc = [ "tcp:" ], hostname, [ ":" port ] - tor-loc = "tor:", hostname, [ ":" port ] - i2p-loc = "i2p:", i2p-addr, [ ":" port ] - - i2p-addr = { unreserved }, ".i2p" + tcp-loc = hostname, [ ":" port ] hostname = domain | IPv4address | IPv6address + i2p-loc = i2p-addr, [ ":" port ] + i2p-addr = { unreserved }, ".i2p" + swiss-number = segment version1 = "#v=1" From 4b2725df006ae172f267072a8bcb222b6be6aad9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 20 Sep 2022 10:09:43 -0400 Subject: [PATCH 178/289] Try to prevent leaking timeouts. --- src/allmydata/protocol_switch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 89570436c..a17f3055c 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -151,6 +151,10 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): 30, self.transport.abortConnection ) + def connectionLost(self, reason): + if self._timeout.active(): + self._timeout.cancel() + def dataReceived(self, data: bytes) -> None: """Handle incoming data. From 81c8e1c57b8b926ebb3a396f653d7149bd4f6577 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:24:02 -0600 Subject: [PATCH 179/289] windows is special --- src/allmydata/test/test_runner.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 14d0dfb7f..5d8143558 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -42,6 +42,7 @@ from twisted.trial import unittest from twisted.internet import reactor from twisted.python import usage +from twisted.python.runtime import platform from twisted.internet.defer import ( inlineCallbacks, DeferredList, @@ -516,8 +517,12 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): ) yield tahoe.stop_and_wait() - # twistd.pid should be gone by now. - self.assertFalse(tahoe.twistd_pid_file.exists()) + # twistd.pid should be gone by now -- except on Windows, where + # killing a subprocess immediately exits with no chance for + # any shutdown code (that is, no Twisted shutdown hooks can + # run). + if not platform.isWindows(): + self.assertFalse(tahoe.twistd_pid_file.exists()) def _remove(self, res, file): fileutil.remove(file) From 6db1476dacc99c00a12d88b1c6af6a8aa76f3404 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:44:21 -0600 Subject: [PATCH 180/289] comment typo --- src/allmydata/scripts/tahoe_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 6dfa726a3..721ced376 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -244,7 +244,7 @@ def run(reactor, config, runApp=twistd.runApp): "--rundir", basedir, ] if sys.platform != "win32": - # turn off Twisted's pid-file to use our own -- but only on + # turn off Twisted's pid-file to use our own -- but not on # windows, because twistd doesn't know about pidfiles there twistd_args.extend(["--pidfile", None]) twistd_args.extend(config.twistd_args) From 0eeb11c9cd45598fbe2e5bdccb4f9cf50fe222f3 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:44:51 -0600 Subject: [PATCH 181/289] after shutdown --- src/allmydata/scripts/tahoe_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 721ced376..40c4a6612 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -271,7 +271,7 @@ def run(reactor, config, runApp=twistd.runApp): return 1 else: reactor.addSystemEventTrigger( - "during", "shutdown", + "after", "shutdown", lambda: cleanup_pidfile(pidfile) ) From 77bc83d341794afbd0c6884fb5e0e914dbe90632 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:45:19 -0600 Subject: [PATCH 182/289] incorrectly removed --- src/allmydata/scripts/tahoe_run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 40c4a6612..eb4bb0b66 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -276,5 +276,6 @@ def run(reactor, config, runApp=twistd.runApp): ) # We always pass --nodaemon so twistd.runApp does not daemonize. + print("running node in %s" % (quoted_basedir,), file=out) runApp(twistd_config) return 0 From 1f29cc9c29e42a472ce893259f5bdbf2a31c00e0 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 20 Sep 2022 14:50:46 -0600 Subject: [PATCH 183/289] windows special --- src/allmydata/test/test_runner.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 5d8143558..cf6e9f3b5 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -605,8 +605,10 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): ), ) - # It should not be running. - self.assertFalse(tahoe.twistd_pid_file.exists()) + # It should not be running (but windows shutdown can't run + # code so the PID file still exists there). + if not platform.isWindows(): + self.assertFalse(tahoe.twistd_pid_file.exists()) # Wait for the operation to *complete*. If we got this far it's # because we got the expected message so we can expect the "tahoe ..." From 5973196931d2143f68a34d9b01857339582ec5c0 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:00:27 -0600 Subject: [PATCH 184/289] refactor: use filelock and test it --- setup.py | 1 + src/allmydata/test/test_runner.py | 47 ++++++++++++ src/allmydata/util/pid.py | 117 ++++++++++++++++++------------ 3 files changed, 119 insertions(+), 46 deletions(-) diff --git a/setup.py b/setup.py index bd16a61ce..d99831347 100644 --- a/setup.py +++ b/setup.py @@ -141,6 +141,7 @@ install_requires = [ # for pid-file support "psutil", + "filelock", ] setup_requires = [ diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index cf6e9f3b5..5a8311649 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -50,6 +50,11 @@ from twisted.internet.defer import ( from twisted.python.filepath import FilePath from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv +from allmydata.util.pid import ( + check_pid_process, + _pidfile_to_lockpath, + ProcessInTheWay, +) from allmydata.test import common_util import allmydata from allmydata.scripts.runner import ( @@ -617,3 +622,45 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): # What's left is a perfect indicator that the process has exited and # we won't get blamed for leaving the reactor dirty. yield client_running + + +class PidFileLocking(SyncTestCase): + """ + Direct tests for allmydata.util.pid functions + """ + + def test_locking(self): + """ + Fail to create a pidfile if another process has the lock already. + """ + # this can't just be "our" process because the locking library + # allows the same process to acquire a lock multiple times. + pidfile = FilePath("foo") + lockfile = _pidfile_to_lockpath(pidfile) + + with open("code.py", "w") as f: + f.write( + "\n".join([ + "import filelock, time", + "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), + " print('.', flush=True)", + " time.sleep(5)", + ]) + ) + proc = Popen( + [sys.executable, "code.py"], + stdout=PIPE, + stderr=PIPE, + start_new_session=True, + ) + # make sure our subprocess has had time to acquire the lock + # for sure (from the "." it prints) + self.assertThat( + proc.stdout.read(1), + Equals(b".") + ) + + # we should not be able to acuire this corresponding lock as well + with self.assertRaises(ProcessInTheWay): + check_pid_process(pidfile) + proc.terminate() diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index ff8129bbc..e256615d6 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,6 +1,10 @@ import os import psutil +# the docs are a little misleading, but this is either WindowsFileLock +# or UnixFileLock depending upon the platform we're currently on +from filelock import FileLock, Timeout + class ProcessInTheWay(Exception): """ @@ -20,6 +24,14 @@ class CannotRemovePidFile(Exception): """ +def _pidfile_to_lockpath(pidfile): + """ + internal helper. + :returns FilePath: a path to use for file-locking the given pidfile + """ + return pidfile.sibling("{}.lock".format(pidfile.basename())) + + def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -34,57 +46,70 @@ def check_pid_process(pidfile, find_process=None): :raises ProcessInTheWay: if a running process exists at our PID """ find_process = psutil.Process if find_process is None else find_process - # check if we have another instance running already - if pidfile.exists(): - with pidfile.open("r") as f: - content = f.read().decode("utf8").strip() - try: - pid, starttime = content.split() - pid = int(pid) - starttime = float(starttime) - except ValueError: - raise InvalidPidFile( - "found invalid PID file in {}".format( - pidfile - ) - ) - try: - # if any other process is running at that PID, let the - # user decide if this is another legitimate - # instance. Automated programs may use the start-time to - # help decide this (if the PID is merely recycled, the - # start-time won't match). - find_process(pid) - raise ProcessInTheWay( - "A process is already running as PID {}".format(pid) - ) - except psutil.NoSuchProcess: - print( - "'{pidpath}' refers to {pid} that isn't running".format( - pidpath=pidfile.path, - pid=pid, - ) - ) - # nothing is running at that PID so it must be a stale file - pidfile.remove() + lock_path = _pidfile_to_lockpath(pidfile) - # write our PID + start-time to the pid-file - pid = os.getpid() - starttime = find_process(pid).create_time() - with pidfile.open("w") as f: - f.write("{} {}\n".format(pid, starttime).encode("utf8")) + try: + # a short timeout is fine, this lock should only be active + # while someone is reading or deleting the pidfile .. and + # facilitates testing the locking itself. + with FileLock(lock_path.path, timeout=2): + # check if we have another instance running already + if pidfile.exists(): + with pidfile.open("r") as f: + content = f.read().decode("utf8").strip() + try: + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + except ValueError: + raise InvalidPidFile( + "found invalid PID file in {}".format( + pidfile + ) + ) + try: + # if any other process is running at that PID, let the + # user decide if this is another legitimate + # instance. Automated programs may use the start-time to + # help decide this (if the PID is merely recycled, the + # start-time won't match). + find_process(pid) + raise ProcessInTheWay( + "A process is already running as PID {}".format(pid) + ) + except psutil.NoSuchProcess: + print( + "'{pidpath}' refers to {pid} that isn't running".format( + pidpath=pidfile.path, + pid=pid, + ) + ) + # nothing is running at that PID so it must be a stale file + pidfile.remove() + + # write our PID + start-time to the pid-file + pid = os.getpid() + starttime = find_process(pid).create_time() + with pidfile.open("w") as f: + f.write("{} {}\n".format(pid, starttime).encode("utf8")) + except Timeout: + raise ProcessInTheWay( + "Another process is still locking {}".format(pidfile.path) + ) def cleanup_pidfile(pidfile): """ Safely clean up a PID-file """ - try: - pidfile.remove() - except Exception as e: - raise CannotRemovePidFile( - "Couldn't remove '{pidfile}': {err}.".format( - pidfile=pidfile.path, - err=e, + lock_path = _pidfile_to_lockpath(pidfile) + with FileLock(lock_path.path): + try: + pidfile.remove() + except Exception as e: + raise CannotRemovePidFile( + "Couldn't remove '{pidfile}': {err}.".format( + pidfile=pidfile.path, + err=e, + ) ) - ) From ea39e4ca6902daad125596f5e1e2b81989e9cb6b Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:01:28 -0600 Subject: [PATCH 185/289] docstring --- src/allmydata/test/cli/test_run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 6358b70dd..551164d3c 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -185,6 +185,9 @@ class RunTests(SyncTestCase): @given(text()) def test_pidfile_contents(self, content): + """ + invalid contents for a pidfile raise errors + """ assume(not self.good_file_content_re.match(content)) pidfile = FilePath("pidfile") pidfile.setContent(content.encode("utf8")) From 56775dde192c90b48fa85cfcb4a2651f5b264791 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:05:30 -0600 Subject: [PATCH 186/289] refactor: parsing in a function --- src/allmydata/scripts/tahoe_run.py | 6 +++--- src/allmydata/util/pid.py | 34 +++++++++++++++++++----------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index eb4bb0b66..4d17492d4 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -29,6 +29,7 @@ from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_pat from allmydata.util.configutil import UnknownConfigError from allmydata.util.deferredutil import HookMixin from allmydata.util.pid import ( + parse_pidfile, check_pid_process, cleanup_pidfile, ProcessInTheWay, @@ -66,10 +67,9 @@ def get_pid_from_pidfile(pidfile): except EnvironmentError: return None - pid, _ = data.split() try: - pid = int(pid) - except ValueError: + pid, _ = parse_pidfile(pidfile) + except InvalidPidFile: return -1 return pid diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index e256615d6..1cb2cc45a 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -32,6 +32,27 @@ def _pidfile_to_lockpath(pidfile): return pidfile.sibling("{}.lock".format(pidfile.basename())) +def parse_pidfile(pidfile): + """ + :param FilePath pidfile: + :returns tuple: 2-tuple of pid, creation-time as int, float + :raises InvalidPidFile: on error + """ + with pidfile.open("r") as f: + content = f.read().decode("utf8").strip() + try: + pid, starttime = content.split() + pid = int(pid) + starttime = float(starttime) + except ValueError: + raise InvalidPidFile( + "found invalid PID file in {}".format( + pidfile + ) + ) + return pid, startime + + def check_pid_process(pidfile, find_process=None): """ If another instance appears to be running already, raise an @@ -55,18 +76,7 @@ def check_pid_process(pidfile, find_process=None): with FileLock(lock_path.path, timeout=2): # check if we have another instance running already if pidfile.exists(): - with pidfile.open("r") as f: - content = f.read().decode("utf8").strip() - try: - pid, starttime = content.split() - pid = int(pid) - starttime = float(starttime) - except ValueError: - raise InvalidPidFile( - "found invalid PID file in {}".format( - pidfile - ) - ) + pid, starttime = parse_pidfile(pidfile) try: # if any other process is running at that PID, let the # user decide if this is another legitimate From 390c8c52da3f9f0ea21ad789caf7c8f6ae9bbc74 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:23:30 -0600 Subject: [PATCH 187/289] formatting + typo --- newsfragments/3926.incompat | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/newsfragments/3926.incompat b/newsfragments/3926.incompat index 3f58b4ba8..674ad289c 100644 --- a/newsfragments/3926.incompat +++ b/newsfragments/3926.incompat @@ -1,10 +1,10 @@ -Record both the PID and the process creation-time +Record both the PID and the process creation-time: -A new kind of pidfile in `running.process` records both +a new kind of pidfile in `running.process` records both the PID and the creation-time of the process. This facilitates automatic discovery of a "stale" pidfile that points to a currently-running process. If the recorded creation-time matches the creation-time of the running process, then it is a still-running -`tahoe run` proecss. Otherwise, the file is stale. +`tahoe run` process. Otherwise, the file is stale. The `twistd.pid` file is no longer present. \ No newline at end of file From e111694b3e33e54db974acbd057d74380c6de4ce Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:28:09 -0600 Subject: [PATCH 188/289] get rid of find_process= --- src/allmydata/util/pid.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 1cb2cc45a..d681d819e 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -53,7 +53,7 @@ def parse_pidfile(pidfile): return pid, startime -def check_pid_process(pidfile, find_process=None): +def check_pid_process(pidfile): """ If another instance appears to be running already, raise an exception. Otherwise, write our PID + start time to the pidfile @@ -61,12 +61,8 @@ def check_pid_process(pidfile, find_process=None): :param FilePath pidfile: the file to read/write our PID from. - :param Callable find_process: None, or a custom way to get a - Process objet (usually for tests) - :raises ProcessInTheWay: if a running process exists at our PID """ - find_process = psutil.Process if find_process is None else find_process lock_path = _pidfile_to_lockpath(pidfile) try: @@ -83,7 +79,7 @@ def check_pid_process(pidfile, find_process=None): # instance. Automated programs may use the start-time to # help decide this (if the PID is merely recycled, the # start-time won't match). - find_process(pid) + psutil.Process(pid) raise ProcessInTheWay( "A process is already running as PID {}".format(pid) ) @@ -98,8 +94,7 @@ def check_pid_process(pidfile, find_process=None): pidfile.remove() # write our PID + start-time to the pid-file - pid = os.getpid() - starttime = find_process(pid).create_time() + starttime = psutil.Process().create_time() with pidfile.open("w") as f: f.write("{} {}\n".format(pid, starttime).encode("utf8")) except Timeout: From 0a09d23525fc4be928fa288c1301327d6eaccf32 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 19:29:40 -0600 Subject: [PATCH 189/289] more docstring --- src/allmydata/util/pid.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index d681d819e..f965d72ab 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -105,7 +105,8 @@ def check_pid_process(pidfile): def cleanup_pidfile(pidfile): """ - Safely clean up a PID-file + Remove the pidfile specified (respecting locks). If anything at + all goes wrong, `CannotRemovePidFile` is raised. """ lock_path = _pidfile_to_lockpath(pidfile) with FileLock(lock_path.path): From 6eebbda7c6c06732932c50a96ba1a5315c9d35f4 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:07:29 -0600 Subject: [PATCH 190/289] documentation, example code --- docs/check_running.py | 47 +++++++++++++++++++++++++++++++++++++++++++ docs/running.rst | 29 ++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 docs/check_running.py diff --git a/docs/check_running.py b/docs/check_running.py new file mode 100644 index 000000000..ecc55da34 --- /dev/null +++ b/docs/check_running.py @@ -0,0 +1,47 @@ + +import psutil +import filelock + + +def can_spawn_tahoe(pidfile): + """ + Determine if we can spawn a Tahoe-LAFS for the given pidfile. That + pidfile may be deleted if it is stale. + + :param pathlib.Path pidfile: the file to check, that is the Path + to "running.process" in a Tahoe-LAFS configuration directory + + :returns bool: True if we can spawn `tahoe run` here + """ + lockpath = pidfile.parent / (pidfile.name + ".lock") + with filelock.FileLock(lockpath): + try: + with pidfile.open("r") as f: + pid, create_time = f.read().strip().split(" ", 1) + except FileNotFoundError: + return True + + # somewhat interesting: we have a pidfile + pid = int(pid) + create_time = float(create_time) + + try: + proc = psutil.Process(pid) + # most interesting case: there _is_ a process running at the + # recorded PID -- but did it just happen to get that PID, or + # is it the very same one that wrote the file? + if create_time == proc.create_time(): + # _not_ stale! another intance is still running against + # this configuration + return False + + except psutil.NoSuchProcess: + pass + + # the file is stale + pidfile.unlink() + return True + + +from pathlib import Path +print("can spawn?", can_spawn_tahoe(Path("running.process"))) diff --git a/docs/running.rst b/docs/running.rst index 406c8200b..2cff59928 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -124,6 +124,35 @@ Tahoe-LAFS. .. _magic wormhole: https://magic-wormhole.io/ +Multiple Instances +------------------ + +Running multiple instances against the same configuration directory isn't supported. +This will lead to undefined behavior and could corrupt the configuration state. + +We attempt to avoid this situation with a "pidfile"-style file in the config directory called ``running.process``. +There may be a parallel file called ``running.process.lock`` in existence. + +The ``.lock`` file exists to make sure only one process modifies ``running.process`` at once. +The lock file is managed by the `lockfile `_ library. +If you wish to make use of ``running.process`` for any reason you should also lock it and follow the semantics of lockfile. + +If ``running.process` exists it file contains the PID and the creation-time of the process. +When no such file exists, there is no other process running on this configuration. +If there is a ``running.process`` file, it may be a leftover file or it may indicate that another process is running against this config. +To tell the difference, determine if the PID in the file exists currently. +If it does, check the creation-time of the process versus the one in the file. +If these match, there is another process currently running. +Otherwise, the file is stale -- it should be removed before starting Tahoe-LAFS. + +Some example Python code to check the above situations: + +.. literalinclude:: check_running.py + + + + + A note about small grids ------------------------ From 930f4029f370313222b5c0872754f1db16434029 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:07:46 -0600 Subject: [PATCH 191/289] properly write pid, create-time --- src/allmydata/util/pid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index f965d72ab..1a833f285 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -94,9 +94,9 @@ def check_pid_process(pidfile): pidfile.remove() # write our PID + start-time to the pid-file - starttime = psutil.Process().create_time() + proc = psutil.Process() with pidfile.open("w") as f: - f.write("{} {}\n".format(pid, starttime).encode("utf8")) + f.write("{} {}\n".format(proc.pid, proc.create_time()).encode("utf8")) except Timeout: raise ProcessInTheWay( "Another process is still locking {}".format(pidfile.path) From 8474ecf83d46a25a473269f4f7907a5eb6e6e552 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:15:07 -0600 Subject: [PATCH 192/289] typo --- src/allmydata/util/pid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index 1a833f285..c13dc32f3 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -50,7 +50,7 @@ def parse_pidfile(pidfile): pidfile ) ) - return pid, startime + return pid, starttime def check_pid_process(pidfile): From fedea9696412d7397f58c11ea04a9148c55f8fd8 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:26:14 -0600 Subject: [PATCH 193/289] less state --- src/allmydata/test/cli/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index 551164d3c..e84f52096 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -168,7 +168,7 @@ class RunTests(SyncTestCase): config['basedir'] = basedir.path config.twistd_args = [] - from twisted.internet import reactor + reactor = MemoryReactor() runs = [] result_code = run(reactor, config, runApp=runs.append) From 8d8b0e6f01cdd8ab1f64593eda772a8b7db6c3d6 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 20:40:25 -0600 Subject: [PATCH 194/289] cleanup --- src/allmydata/scripts/tahoe_run.py | 6 +----- src/allmydata/util/pid.py | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 4d17492d4..e22e8c307 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -62,13 +62,9 @@ def get_pid_from_pidfile(pidfile): inaccessible, ``-1`` if PID file invalid. """ try: - with open(pidfile, "r") as f: - data = f.read().strip() + pid, _ = parse_pidfile(pidfile) except EnvironmentError: return None - - try: - pid, _ = parse_pidfile(pidfile) except InvalidPidFile: return -1 diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py index c13dc32f3..f12c201d1 100644 --- a/src/allmydata/util/pid.py +++ b/src/allmydata/util/pid.py @@ -1,4 +1,3 @@ -import os import psutil # the docs are a little misleading, but this is either WindowsFileLock From 4f5a1ac37222e51974bcb0a28b5ec9e0e6c0e944 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 21 Sep 2022 23:36:23 -0600 Subject: [PATCH 195/289] naming? --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 5a8311649..962dffd1a 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -638,7 +638,7 @@ class PidFileLocking(SyncTestCase): pidfile = FilePath("foo") lockfile = _pidfile_to_lockpath(pidfile) - with open("code.py", "w") as f: + with open("other_lock.py", "w") as f: f.write( "\n".join([ "import filelock, time", @@ -648,7 +648,7 @@ class PidFileLocking(SyncTestCase): ]) ) proc = Popen( - [sys.executable, "code.py"], + [sys.executable, "other_lock.py"], stdout=PIPE, stderr=PIPE, start_new_session=True, From 8ebe331c358789f3af8dd9d64607ee63404077d7 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 22 Sep 2022 00:11:20 -0600 Subject: [PATCH 196/289] maybe a newline helps --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 962dffd1a..3d8180c7a 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -643,7 +643,7 @@ class PidFileLocking(SyncTestCase): "\n".join([ "import filelock, time", "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), - " print('.', flush=True)", + " print('.\n', flush=True)", " time.sleep(5)", ]) ) @@ -657,7 +657,7 @@ class PidFileLocking(SyncTestCase): # for sure (from the "." it prints) self.assertThat( proc.stdout.read(1), - Equals(b".") + Equals(b".\n") ) # we should not be able to acuire this corresponding lock as well From a182a2507987213b519bcb22c6f49eec0004830c Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 22 Sep 2022 21:43:20 -0600 Subject: [PATCH 197/289] backslashes --- src/allmydata/test/test_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 3d8180c7a..e6b7b746f 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -641,9 +641,10 @@ class PidFileLocking(SyncTestCase): with open("other_lock.py", "w") as f: f.write( "\n".join([ - "import filelock, time", + "import filelock, time, sys", "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), - " print('.\n', flush=True)", + " sys.stdout.write('.\\n')", + " sys.stdout.flush()", " time.sleep(5)", ]) ) @@ -656,7 +657,7 @@ class PidFileLocking(SyncTestCase): # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) self.assertThat( - proc.stdout.read(1), + proc.stdout.read(2), Equals(b".\n") ) From 62b92585c62c44694e5db7a8769a772b2d712a07 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 22 Sep 2022 23:57:19 -0600 Subject: [PATCH 198/289] simplify --- src/allmydata/test/test_runner.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index e6b7b746f..5431fbaa9 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -656,12 +656,9 @@ class PidFileLocking(SyncTestCase): ) # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) - self.assertThat( - proc.stdout.read(2), - Equals(b".\n") - ) + proc.stdout.read(2), - # we should not be able to acuire this corresponding lock as well + # acquiring the same lock should fail; it is locked by the subprocess with self.assertRaises(ProcessInTheWay): check_pid_process(pidfile) proc.terminate() From 7fdeb8797e8164f1b0fd15ddda4108417545e00d Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 23 Sep 2022 00:26:39 -0600 Subject: [PATCH 199/289] hardcoding bad --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 5431fbaa9..f414ed8b3 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -635,7 +635,7 @@ class PidFileLocking(SyncTestCase): """ # this can't just be "our" process because the locking library # allows the same process to acquire a lock multiple times. - pidfile = FilePath("foo") + pidfile = FilePath(self.mktemp()) lockfile = _pidfile_to_lockpath(pidfile) with open("other_lock.py", "w") as f: From f2cfd96b5e3af0fe82a7bf1ef770cad08d3969cd Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 23 Sep 2022 01:04:58 -0600 Subject: [PATCH 200/289] typo, longer timeout --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index f414ed8b3..b80891642 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -645,7 +645,7 @@ class PidFileLocking(SyncTestCase): "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), " sys.stdout.write('.\\n')", " sys.stdout.flush()", - " time.sleep(5)", + " time.sleep(10)", ]) ) proc = Popen( @@ -656,7 +656,7 @@ class PidFileLocking(SyncTestCase): ) # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) - proc.stdout.read(2), + proc.stdout.read(2) # acquiring the same lock should fail; it is locked by the subprocess with self.assertRaises(ProcessInTheWay): From 8991509f8c82642d75e3070ad7ae02bfe061977d Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 25 Sep 2022 00:16:40 -0600 Subject: [PATCH 201/289] blackslashes.... --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index b80891642..f8211ec02 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -642,7 +642,7 @@ class PidFileLocking(SyncTestCase): f.write( "\n".join([ "import filelock, time, sys", - "with filelock.FileLock('{}', timeout=1):".format(lockfile.path), + "with filelock.FileLock(r'{}', timeout=1):".format(lockfile.path), " sys.stdout.write('.\\n')", " sys.stdout.flush()", " time.sleep(10)", From d42c00ae9293dd18c9f1efd22e86984c4725f222 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 25 Sep 2022 00:46:30 -0600 Subject: [PATCH 202/289] do all checks with lock --- docs/check_running.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/check_running.py b/docs/check_running.py index ecc55da34..55aae0015 100644 --- a/docs/check_running.py +++ b/docs/check_running.py @@ -21,22 +21,22 @@ def can_spawn_tahoe(pidfile): except FileNotFoundError: return True - # somewhat interesting: we have a pidfile - pid = int(pid) - create_time = float(create_time) + # somewhat interesting: we have a pidfile + pid = int(pid) + create_time = float(create_time) - try: - proc = psutil.Process(pid) - # most interesting case: there _is_ a process running at the - # recorded PID -- but did it just happen to get that PID, or - # is it the very same one that wrote the file? - if create_time == proc.create_time(): - # _not_ stale! another intance is still running against - # this configuration - return False + try: + proc = psutil.Process(pid) + # most interesting case: there _is_ a process running at the + # recorded PID -- but did it just happen to get that PID, or + # is it the very same one that wrote the file? + if create_time == proc.create_time(): + # _not_ stale! another intance is still running against + # this configuration + return False - except psutil.NoSuchProcess: - pass + except psutil.NoSuchProcess: + pass # the file is stale pidfile.unlink() From d16d233872df95b8e3876e3aa32e0fdb30cc9f98 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 25 Sep 2022 00:47:58 -0600 Subject: [PATCH 203/289] wording --- docs/running.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running.rst b/docs/running.rst index 2cff59928..b487f4ae3 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -128,7 +128,7 @@ Multiple Instances ------------------ Running multiple instances against the same configuration directory isn't supported. -This will lead to undefined behavior and could corrupt the configuration state. +This will lead to undefined behavior and could corrupt the configuration or state. We attempt to avoid this situation with a "pidfile"-style file in the config directory called ``running.process``. There may be a parallel file called ``running.process.lock`` in existence. From 4919b6d9066a10028e6548800a589929c3c094d9 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 09:34:36 -0600 Subject: [PATCH 204/289] typo Co-authored-by: Jean-Paul Calderone --- docs/running.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running.rst b/docs/running.rst index b487f4ae3..29df15a3c 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -137,7 +137,7 @@ The ``.lock`` file exists to make sure only one process modifies ``running.proce The lock file is managed by the `lockfile `_ library. If you wish to make use of ``running.process`` for any reason you should also lock it and follow the semantics of lockfile. -If ``running.process` exists it file contains the PID and the creation-time of the process. +If ``running.process`` exists then it contains the PID and the creation-time of the process. When no such file exists, there is no other process running on this configuration. If there is a ``running.process`` file, it may be a leftover file or it may indicate that another process is running against this config. To tell the difference, determine if the PID in the file exists currently. From 7aae2f78575541799002645e85a3aeab9f8706c2 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 09:34:54 -0600 Subject: [PATCH 205/289] Clarify Co-authored-by: Jean-Paul Calderone --- docs/running.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running.rst b/docs/running.rst index 29df15a3c..263448735 100644 --- a/docs/running.rst +++ b/docs/running.rst @@ -142,7 +142,7 @@ When no such file exists, there is no other process running on this configuratio If there is a ``running.process`` file, it may be a leftover file or it may indicate that another process is running against this config. To tell the difference, determine if the PID in the file exists currently. If it does, check the creation-time of the process versus the one in the file. -If these match, there is another process currently running. +If these match, there is another process currently running and using this config. Otherwise, the file is stale -- it should be removed before starting Tahoe-LAFS. Some example Python code to check the above situations: From a7398e13f7c82707738c3862cb085d7e2a055bb2 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 09:35:17 -0600 Subject: [PATCH 206/289] Update docs/check_running.py Co-authored-by: Jean-Paul Calderone --- docs/check_running.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/check_running.py b/docs/check_running.py index 55aae0015..2705f1721 100644 --- a/docs/check_running.py +++ b/docs/check_running.py @@ -38,9 +38,9 @@ def can_spawn_tahoe(pidfile): except psutil.NoSuchProcess: pass - # the file is stale - pidfile.unlink() - return True + # the file is stale + pidfile.unlink() + return True from pathlib import Path From ca522a5293c8f2e38e9b8d2071fc3865907f4177 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 10:07:44 -0600 Subject: [PATCH 207/289] sys.argv not inline --- src/allmydata/test/test_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index f8211ec02..00c87ce08 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -642,14 +642,14 @@ class PidFileLocking(SyncTestCase): f.write( "\n".join([ "import filelock, time, sys", - "with filelock.FileLock(r'{}', timeout=1):".format(lockfile.path), + "with filelock.FileLock(sys.argv[1], timeout=1):", " sys.stdout.write('.\\n')", " sys.stdout.flush()", " time.sleep(10)", ]) ) proc = Popen( - [sys.executable, "other_lock.py"], + [sys.executable, "other_lock.py", lockfile.path], stdout=PIPE, stderr=PIPE, start_new_session=True, From bef71978b6e7181598fc30f1b94d56b0b7e6a7c5 Mon Sep 17 00:00:00 2001 From: meejah Date: Wed, 28 Sep 2022 10:08:13 -0600 Subject: [PATCH 208/289] don't need start_new_session --- src/allmydata/test/test_runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 00c87ce08..74e3f803e 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -652,7 +652,6 @@ class PidFileLocking(SyncTestCase): [sys.executable, "other_lock.py", lockfile.path], stdout=PIPE, stderr=PIPE, - start_new_session=True, ) # make sure our subprocess has had time to acquire the lock # for sure (from the "." it prints) From 2a3b110d53146d86709a8e161d90e65d6a07f0fe Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 30 Sep 2022 16:48:23 -0600 Subject: [PATCH 209/289] simple build automation --- Makefile | 44 ++++++++++++++++++++++++++++++++++++++++++++ setup.py | 4 ++++ 2 files changed, 48 insertions(+) diff --git a/Makefile b/Makefile index 5cbd863a3..6dd2b743b 100644 --- a/Makefile +++ b/Makefile @@ -224,3 +224,47 @@ src/allmydata/_version.py: .tox/create-venvs.log: tox.ini setup.py tox --notest -p all | tee -a "$(@)" + + +# Make a new release. TODO: +# - clean checkout necessary? garbage in tarball? +release: + @echo "Is checkout clean?" + git diff-files --quiet + git diff-index --quiet --cached HEAD -- + + @echo "Install required build software" + python3 -m pip install --editable .[build] + + @echo "Test README" + python3 setup.py check -r -s + + @echo "Update NEWS" + python3 -m towncrier build --yes --version `python3 misc/build_helpers/update-version.py --no-tag` + git add -u + git commit -m "update NEWS for release" + + @echo "Bump version and create tag" + python3 misc/build_helpers/update-version.py + + @echo "Build and sign wheel" + python3 setup.py bdist_wheel + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl + ls dist/*`git describe --abbrev=0`* + + @echo "Build and sign source-dist" + python3 setup.py sdist + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz + ls dist/*`git describe --abbrev=0`* + +release-test: + gpg --verify dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc + gpg --verify dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc + virtualenv testmf_venv + testmf_venv/bin/pip install dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl + testmf_venv/bin/tahoe-lafs --version +# ... + rm -rf testmf_venv + +release-upload: + twine upload dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc diff --git a/setup.py b/setup.py index d99831347..ffe23a7b5 100644 --- a/setup.py +++ b/setup.py @@ -380,6 +380,10 @@ setup(name="tahoe-lafs", # also set in __init__.py # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2392 for some # discussion. ':sys_platform=="win32"': ["pywin32 != 226"], + "build": [ + "dulwich", + "gpg", + ], "test": [ "flake8", # Pin a specific pyflakes so we don't have different folks From 4b708d87bd0bd6d07517a113c065e0f0329b8d34 Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 30 Sep 2022 16:53:48 -0600 Subject: [PATCH 210/289] wip --- Makefile | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 6dd2b743b..66f1819ad 100644 --- a/Makefile +++ b/Makefile @@ -239,6 +239,9 @@ release: @echo "Test README" python3 setup.py check -r -s +# XXX make branch, based on a ticket (provided how?) +# XXX or, specify that "make release" must run on such a branch "XXXX.tahoe-release" + @echo "Update NEWS" python3 -m towncrier build --yes --version `python3 misc/build_helpers/update-version.py --no-tag` git add -u @@ -249,22 +252,22 @@ release: @echo "Build and sign wheel" python3 setup.py bdist_wheel - gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl - ls dist/*`git describe --abbrev=0`* + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl + ls dist/*`git describe | cut -b 12-`* @echo "Build and sign source-dist" python3 setup.py sdist - gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz - ls dist/*`git describe --abbrev=0`* + gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz + ls dist/*`git describe | cut -b 12-`* release-test: - gpg --verify dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc - gpg --verify dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc + gpg --verify dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc + gpg --verify dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc virtualenv testmf_venv - testmf_venv/bin/pip install dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl + testmf_venv/bin/pip install dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl testmf_venv/bin/tahoe-lafs --version # ... rm -rf testmf_venv release-upload: - twine upload dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl dist/tahoe_lafs-`git describe --abbrev=0`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz dist/tahoe-lafs-`git describe --abbrev=0`.tar.gz.asc + twine upload dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc From 4137d6ebb7b73de4782e0d332485684cf3585376 Mon Sep 17 00:00:00 2001 From: meejah Date: Fri, 30 Sep 2022 17:20:19 -0600 Subject: [PATCH 211/289] proper smoke-test --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 66f1819ad..5ad676e86 100644 --- a/Makefile +++ b/Makefile @@ -260,13 +260,13 @@ release: gpg --pinentry=loopback -u meejah@meejah.ca --armor --detach-sign dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz ls dist/*`git describe | cut -b 12-`* +# basically just a bare-minimum smoke-test that it installs and runs release-test: gpg --verify dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc gpg --verify dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc virtualenv testmf_venv testmf_venv/bin/pip install dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl - testmf_venv/bin/tahoe-lafs --version -# ... + testmf_venv/bin/tahoe --version rm -rf testmf_venv release-upload: From 923f456d6e9f53ecb6db67c73a999f72027b2655 Mon Sep 17 00:00:00 2001 From: meejah Date: Sat, 1 Oct 2022 14:47:19 -0600 Subject: [PATCH 212/289] all upload steps --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 5ad676e86..b68b788ca 100644 --- a/Makefile +++ b/Makefile @@ -270,4 +270,6 @@ release-test: rm -rf testmf_venv release-upload: + scp dist/*`git describe | cut -b 12-`* meejah@tahoe-lafs.org:/home/source/downloads + git push origin_push tahoe-lafs-`git describe | cut -b 12-` twine upload dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl dist/tahoe_lafs-`git describe | cut -b 12-`-py3-none-any.whl.asc dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz dist/tahoe-lafs-`git describe | cut -b 12-`.tar.gz.asc From c711b5b0a9c825d6e1bfb3a30437da380a63b422 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 13:33:05 -0600 Subject: [PATCH 213/289] clean docs --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index b68b788ca..8b34fd0e1 100644 --- a/Makefile +++ b/Makefile @@ -233,6 +233,9 @@ release: git diff-files --quiet git diff-index --quiet --cached HEAD -- + @echo "Clean docs build area" + rm -rf docs/_build/ + @echo "Install required build software" python3 -m pip install --editable .[build] From 3d3dc187646f0ba2203f73eecf2c927147400884 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 14:34:42 -0600 Subject: [PATCH 214/289] better instructions --- Makefile | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8b34fd0e1..c501ba3c5 100644 --- a/Makefile +++ b/Makefile @@ -226,8 +226,16 @@ src/allmydata/_version.py: tox --notest -p all | tee -a "$(@)" -# Make a new release. TODO: -# - clean checkout necessary? garbage in tarball? +# to make a new release: +# - create a ticket for the release in Trac +# - ensure local copy is up-to-date +# - create a branch like "XXXX.release" from up-to-date master +# - in the branch, run "make release" +# - run "make release-test" +# - perform any other sanity-checks on the release +# - run "make release-upload" +# Note that several commands below hard-code "meejah"; if you are +# someone else please adjust them. release: @echo "Is checkout clean?" git diff-files --quiet From a22be070b8ff9b4e05af9f28e61d209f64fcdeb2 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 14:51:29 -0600 Subject: [PATCH 215/289] version-updating script --- misc/build_helpers/update-version.py | 96 ++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 misc/build_helpers/update-version.py diff --git a/misc/build_helpers/update-version.py b/misc/build_helpers/update-version.py new file mode 100644 index 000000000..38baf7c7c --- /dev/null +++ b/misc/build_helpers/update-version.py @@ -0,0 +1,96 @@ +# +# this updates the (tagged) version of the software +# +# Any "options" are hard-coded in here (e.g. the GnuPG key to use) +# + +author = "meejah " + + +import sys +import time +import itertools +from datetime import datetime +from packaging.version import Version + +from dulwich.repo import Repo +from dulwich.porcelain import ( + tag_list, + tag_create, + status, +) + +from twisted.internet.task import ( + react, +) +from twisted.internet.defer import ( + ensureDeferred, +) + + +def existing_tags(git): + versions = sorted( + Version(v.decode("utf8").lstrip("tahoe-lafs-")) + for v in tag_list(git) + if v.startswith(b"tahoe-lafs-") + ) + return versions + + +def create_new_version(git): + versions = existing_tags(git) + biggest = versions[-1] + + return Version( + "{}.{}.{}".format( + biggest.major, + biggest.minor + 1, + 0, + ) + ) + + +async def main(reactor): + git = Repo(".") + + st = status(git) + if any(st.staged.values()) or st.unstaged: + print("unclean checkout; aborting") + raise SystemExit(1) + + v = create_new_version(git) + if "--no-tag" in sys.argv: + print(v) + return + + print("Existing tags: {}".format("\n".join(str(x) for x in existing_tags(git)))) + print("New tag will be {}".format(v)) + + # the "tag time" is seconds from the epoch .. we quantize these to + # the start of the day in question, in UTC. + now = datetime.now() + s = now.utctimetuple() + ts = int( + time.mktime( + time.struct_time((s.tm_year, s.tm_mon, s.tm_mday, 0, 0, 0, 0, s.tm_yday, 0)) + ) + ) + tag_create( + repo=git, + tag="tahoe-lafs-{}".format(str(v)).encode("utf8"), + author=author.encode("utf8"), + message="Release {}".format(v).encode("utf8"), + annotated=True, + objectish=b"HEAD", + sign=author.encode("utf8"), + tag_time=ts, + tag_timezone=0, + ) + + print("Tag created locally, it is not pushed") + print("To push it run something like:") + print(" git push origin {}".format(v)) + + +if __name__ == "__main__": + react(lambda r: ensureDeferred(main(r))) From 1af48672e39ab6430583dbd38fcfe4fa61821d09 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 14:53:03 -0600 Subject: [PATCH 216/289] correct notes --- Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index c501ba3c5..c02184a36 100644 --- a/Makefile +++ b/Makefile @@ -250,14 +250,13 @@ release: @echo "Test README" python3 setup.py check -r -s -# XXX make branch, based on a ticket (provided how?) -# XXX or, specify that "make release" must run on such a branch "XXXX.tahoe-release" - @echo "Update NEWS" python3 -m towncrier build --yes --version `python3 misc/build_helpers/update-version.py --no-tag` git add -u git commit -m "update NEWS for release" +# note that this always bumps the "middle" number, e.g. from 1.17.1 -> 1.18.0 +# and produces a tag into the Git repository @echo "Bump version and create tag" python3 misc/build_helpers/update-version.py From 6bb46a832bfde84714d35625a256265e93688684 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 18:52:57 -0600 Subject: [PATCH 217/289] flake8 --- misc/build_helpers/update-version.py | 1 - 1 file changed, 1 deletion(-) diff --git a/misc/build_helpers/update-version.py b/misc/build_helpers/update-version.py index 38baf7c7c..75b22edae 100644 --- a/misc/build_helpers/update-version.py +++ b/misc/build_helpers/update-version.py @@ -9,7 +9,6 @@ author = "meejah " import sys import time -import itertools from datetime import datetime from packaging.version import Version From 402d80710caa5aa50f0a5bef79ae979a75dc3594 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 19:03:10 -0600 Subject: [PATCH 218/289] news --- newsfragments/3846.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3846.feature diff --git a/newsfragments/3846.feature b/newsfragments/3846.feature new file mode 100644 index 000000000..fd321eaf0 --- /dev/null +++ b/newsfragments/3846.feature @@ -0,0 +1 @@ +"make" based release automation From e8e43d2100f1e8dfc6bd421dffd3824e57b903d0 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 2 Oct 2022 19:05:16 -0600 Subject: [PATCH 219/289] update NEWS for release --- NEWS.rst | 41 +++++++++++++++++++++++++++++++++++++ newsfragments/3327.minor | 0 newsfragments/3526.minor | 1 - newsfragments/3697.minor | 1 - newsfragments/3709.minor | 0 newsfragments/3786.minor | 1 - newsfragments/3788.minor | 0 newsfragments/3802.minor | 0 newsfragments/3816.minor | 0 newsfragments/3828.feature | 8 -------- newsfragments/3846.feature | 1 - newsfragments/3855.minor | 0 newsfragments/3858.minor | 0 newsfragments/3859.minor | 0 newsfragments/3860.minor | 0 newsfragments/3865.incompat | 1 - newsfragments/3867.minor | 0 newsfragments/3868.minor | 0 newsfragments/3871.minor | 0 newsfragments/3872.minor | 0 newsfragments/3873.incompat | 1 - newsfragments/3875.minor | 0 newsfragments/3876.minor | 0 newsfragments/3877.minor | 0 newsfragments/3879.incompat | 1 - newsfragments/3881.minor | 0 newsfragments/3882.minor | 0 newsfragments/3883.minor | 0 newsfragments/3889.minor | 0 newsfragments/3890.minor | 0 newsfragments/3891.minor | 0 newsfragments/3893.minor | 0 newsfragments/3895.minor | 0 newsfragments/3896.minor | 0 newsfragments/3898.minor | 0 newsfragments/3900.minor | 0 newsfragments/3909.minor | 0 newsfragments/3913.minor | 0 newsfragments/3915.minor | 0 newsfragments/3916.minor | 0 newsfragments/3926.incompat | 10 --------- 41 files changed, 41 insertions(+), 25 deletions(-) delete mode 100644 newsfragments/3327.minor delete mode 100644 newsfragments/3526.minor delete mode 100644 newsfragments/3697.minor delete mode 100644 newsfragments/3709.minor delete mode 100644 newsfragments/3786.minor delete mode 100644 newsfragments/3788.minor delete mode 100644 newsfragments/3802.minor delete mode 100644 newsfragments/3816.minor delete mode 100644 newsfragments/3828.feature delete mode 100644 newsfragments/3846.feature delete mode 100644 newsfragments/3855.minor delete mode 100644 newsfragments/3858.minor delete mode 100644 newsfragments/3859.minor delete mode 100644 newsfragments/3860.minor delete mode 100644 newsfragments/3865.incompat delete mode 100644 newsfragments/3867.minor delete mode 100644 newsfragments/3868.minor delete mode 100644 newsfragments/3871.minor delete mode 100644 newsfragments/3872.minor delete mode 100644 newsfragments/3873.incompat delete mode 100644 newsfragments/3875.minor delete mode 100644 newsfragments/3876.minor delete mode 100644 newsfragments/3877.minor delete mode 100644 newsfragments/3879.incompat delete mode 100644 newsfragments/3881.minor delete mode 100644 newsfragments/3882.minor delete mode 100644 newsfragments/3883.minor delete mode 100644 newsfragments/3889.minor delete mode 100644 newsfragments/3890.minor delete mode 100644 newsfragments/3891.minor delete mode 100644 newsfragments/3893.minor delete mode 100644 newsfragments/3895.minor delete mode 100644 newsfragments/3896.minor delete mode 100644 newsfragments/3898.minor delete mode 100644 newsfragments/3900.minor delete mode 100644 newsfragments/3909.minor delete mode 100644 newsfragments/3913.minor delete mode 100644 newsfragments/3915.minor delete mode 100644 newsfragments/3916.minor delete mode 100644 newsfragments/3926.incompat diff --git a/NEWS.rst b/NEWS.rst index 0f9194cc4..7b1fadb8a 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -5,6 +5,47 @@ User-Visible Changes in Tahoe-LAFS ================================== .. towncrier start line +Release 1.18.0 (2022-10-02) +''''''''''''''''''''''''''' + +Backwards Incompatible Changes +------------------------------ + +- Python 3.6 is no longer supported, as it has reached end-of-life and is no longer receiving security updates. (`#3865 `_) +- Python 3.7 or later is now required; Python 2 is no longer supported. (`#3873 `_) +- Share corruption reports stored on disk are now always encoded in UTF-8. (`#3879 `_) +- Record both the PID and the process creation-time: + + a new kind of pidfile in `running.process` records both + the PID and the creation-time of the process. This facilitates + automatic discovery of a "stale" pidfile that points to a + currently-running process. If the recorded creation-time matches + the creation-time of the running process, then it is a still-running + `tahoe run` process. Otherwise, the file is stale. + + The `twistd.pid` file is no longer present. (`#3926 `_) + + +Features +-------- + +- The implementation of SDMF and MDMF (mutables) now requires RSA keys to be exactly 2048 bits, aligning them with the specification. + + Some code existed to allow tests to shorten this and it's + conceptually possible a modified client produced mutables + with different key-sizes. However, the spec says that they + must be 2048 bits. If you happen to have a capability with + a key-size different from 2048 you may use 1.17.1 or earlier + to read the content. (`#3828 `_) +- "make" based release automation (`#3846 `_) + + +Misc/Other +---------- + +- `#3327 `_, `#3526 `_, `#3697 `_, `#3709 `_, `#3786 `_, `#3788 `_, `#3802 `_, `#3816 `_, `#3855 `_, `#3858 `_, `#3859 `_, `#3860 `_, `#3867 `_, `#3868 `_, `#3871 `_, `#3872 `_, `#3875 `_, `#3876 `_, `#3877 `_, `#3881 `_, `#3882 `_, `#3883 `_, `#3889 `_, `#3890 `_, `#3891 `_, `#3893 `_, `#3895 `_, `#3896 `_, `#3898 `_, `#3900 `_, `#3909 `_, `#3913 `_, `#3915 `_, `#3916 `_ + + Release 1.17.1 (2022-01-07) ''''''''''''''''''''''''''' diff --git a/newsfragments/3327.minor b/newsfragments/3327.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3526.minor b/newsfragments/3526.minor deleted file mode 100644 index 8b1378917..000000000 --- a/newsfragments/3526.minor +++ /dev/null @@ -1 +0,0 @@ - diff --git a/newsfragments/3697.minor b/newsfragments/3697.minor deleted file mode 100644 index 0977d8a6f..000000000 --- a/newsfragments/3697.minor +++ /dev/null @@ -1 +0,0 @@ -Added support for Python 3.10. Added support for PyPy3 (3.7 and 3.8, on Linux only). \ No newline at end of file diff --git a/newsfragments/3709.minor b/newsfragments/3709.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3786.minor b/newsfragments/3786.minor deleted file mode 100644 index ecd1a2c4e..000000000 --- a/newsfragments/3786.minor +++ /dev/null @@ -1 +0,0 @@ -Added re-structured text documentation for the OpenMetrics format statistics endpoint. diff --git a/newsfragments/3788.minor b/newsfragments/3788.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3802.minor b/newsfragments/3802.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3816.minor b/newsfragments/3816.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3828.feature b/newsfragments/3828.feature deleted file mode 100644 index d396439b0..000000000 --- a/newsfragments/3828.feature +++ /dev/null @@ -1,8 +0,0 @@ -The implementation of SDMF and MDMF (mutables) now requires RSA keys to be exactly 2048 bits, aligning them with the specification. - -Some code existed to allow tests to shorten this and it's -conceptually possible a modified client produced mutables -with different key-sizes. However, the spec says that they -must be 2048 bits. If you happen to have a capability with -a key-size different from 2048 you may use 1.17.1 or earlier -to read the content. diff --git a/newsfragments/3846.feature b/newsfragments/3846.feature deleted file mode 100644 index fd321eaf0..000000000 --- a/newsfragments/3846.feature +++ /dev/null @@ -1 +0,0 @@ -"make" based release automation diff --git a/newsfragments/3855.minor b/newsfragments/3855.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3858.minor b/newsfragments/3858.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3859.minor b/newsfragments/3859.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3860.minor b/newsfragments/3860.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3865.incompat b/newsfragments/3865.incompat deleted file mode 100644 index 59381b269..000000000 --- a/newsfragments/3865.incompat +++ /dev/null @@ -1 +0,0 @@ -Python 3.6 is no longer supported, as it has reached end-of-life and is no longer receiving security updates. \ No newline at end of file diff --git a/newsfragments/3867.minor b/newsfragments/3867.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3868.minor b/newsfragments/3868.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3871.minor b/newsfragments/3871.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3872.minor b/newsfragments/3872.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3873.incompat b/newsfragments/3873.incompat deleted file mode 100644 index da8a5fb0e..000000000 --- a/newsfragments/3873.incompat +++ /dev/null @@ -1 +0,0 @@ -Python 3.7 or later is now required; Python 2 is no longer supported. \ No newline at end of file diff --git a/newsfragments/3875.minor b/newsfragments/3875.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3876.minor b/newsfragments/3876.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3877.minor b/newsfragments/3877.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3879.incompat b/newsfragments/3879.incompat deleted file mode 100644 index ca3f24f94..000000000 --- a/newsfragments/3879.incompat +++ /dev/null @@ -1 +0,0 @@ -Share corruption reports stored on disk are now always encoded in UTF-8. \ No newline at end of file diff --git a/newsfragments/3881.minor b/newsfragments/3881.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3882.minor b/newsfragments/3882.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3883.minor b/newsfragments/3883.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3889.minor b/newsfragments/3889.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3890.minor b/newsfragments/3890.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3891.minor b/newsfragments/3891.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3893.minor b/newsfragments/3893.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3895.minor b/newsfragments/3895.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3896.minor b/newsfragments/3896.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3898.minor b/newsfragments/3898.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3900.minor b/newsfragments/3900.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3909.minor b/newsfragments/3909.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3913.minor b/newsfragments/3913.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3915.minor b/newsfragments/3915.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3916.minor b/newsfragments/3916.minor deleted file mode 100644 index e69de29bb..000000000 diff --git a/newsfragments/3926.incompat b/newsfragments/3926.incompat deleted file mode 100644 index 674ad289c..000000000 --- a/newsfragments/3926.incompat +++ /dev/null @@ -1,10 +0,0 @@ -Record both the PID and the process creation-time: - -a new kind of pidfile in `running.process` records both -the PID and the creation-time of the process. This facilitates -automatic discovery of a "stale" pidfile that points to a -currently-running process. If the recorded creation-time matches -the creation-time of the running process, then it is a still-running -`tahoe run` process. Otherwise, the file is stale. - -The `twistd.pid` file is no longer present. \ No newline at end of file From a53420c1931b4ec9c6a40f5105a44d7d4ac0f846 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 10:49:01 -0400 Subject: [PATCH 220/289] Use known working version of i2pd. --- integration/test_i2p.py | 2 +- newsfragments/3928.minor | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 newsfragments/3928.minor diff --git a/integration/test_i2p.py b/integration/test_i2p.py index f0b06f1e2..97abb40a5 100644 --- a/integration/test_i2p.py +++ b/integration/test_i2p.py @@ -55,7 +55,7 @@ def i2p_network(reactor, temp_dir, request): proto, which("docker"), ( - "docker", "run", "-p", "7656:7656", "purplei2p/i2pd", + "docker", "run", "-p", "7656:7656", "purplei2p/i2pd:release-2.43.0", # Bad URL for reseeds, so it can't talk to other routers. "--reseed.urls", "http://localhost:1/", ), diff --git a/newsfragments/3928.minor b/newsfragments/3928.minor new file mode 100644 index 000000000..e69de29bb From ec15d58e10356016130cb7eaf97681584540a611 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 10:49:08 -0400 Subject: [PATCH 221/289] Actually clean up the container. --- integration/test_i2p.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/test_i2p.py b/integration/test_i2p.py index 97abb40a5..15f9d73cf 100644 --- a/integration/test_i2p.py +++ b/integration/test_i2p.py @@ -63,7 +63,7 @@ def i2p_network(reactor, temp_dir, request): def cleanup(): try: - proto.transport.signalProcess("KILL") + proto.transport.signalProcess("INT") util.block_with_timeout(proto.exited, reactor) except ProcessExitedAlready: pass From b86f99f0ebaa5d0239d18498f59f412967bf0b27 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:00:34 -0400 Subject: [PATCH 222/289] Make this more accurate given changes in spec. --- docs/specifications/url.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 39a830e5a..fe756208b 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -87,11 +87,13 @@ These differences are separated into distinct versions. Version 0 --------- -A Foolscap fURL is considered the canonical definition of a version 0 NURL. +In theory, a Foolscap fURL with a single netloc is considered the canonical definition of a version 0 NURL. Notably, the hash component is defined as the base32-encoded SHA1 hash of the DER form of an x509v3 certificate. A version 0 NURL is identified by the absence of the ``v=1`` fragment. +In practice, real world fURLs may have more than one netloc, so lack of version fragment will likely just involve dispatching the fURL to a different parser. + Examples ~~~~~~~~ @@ -119,7 +121,7 @@ The hash component of a version 1 NURL differs in three ways from the prior vers *all* certificate fields should be considered within the context of the relationship identified by the SPKI hash. 3. The hash is encoded using urlsafe-base64 (without padding) instead of base32. - This provides a more compact representation and minimizes the usability impacts of switching from a 160 bit hash to a 224 bit hash. + This provides a more compact representation and minimizes the usability impacts of switching from a 160 bit hash to a 256 bit hash. A version 1 NURL is identified by the presence of the ``v=1`` fragment. Though the length of the hash string (38 bytes) could also be used to differentiate it from a version 0 NURL, From b0fb72e379bcbfaabb2ae37452d9a68dd481cbea Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:02:48 -0400 Subject: [PATCH 223/289] Link to design issue. --- src/allmydata/client.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 9938ec076..ac8b03e2f 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -591,6 +591,10 @@ def anonymous_storage_enabled(config): @implementer(IStatsProducer) class _Client(node.Node, pollmixin.PollMixin): + """ + This class should be refactored; see + https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931 + """ STOREDIR = 'storage' NODETYPE = "client" @@ -661,7 +665,9 @@ class _Client(node.Node, pollmixin.PollMixin): # TODO this may be the wrong location for now? but as temporary measure # it allows us to get NURLs for testing in test_istorageserver.py. This # will eventually get fixed one way or another in - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901 + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901. See also + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931 for the bigger + # picture issue. self.storage_nurls = set() def init_stats_provider(self): From d753bb58da880a00724bd0a9c592803ee7983fca Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:05:56 -0400 Subject: [PATCH 224/289] Better type for storage_nurls. --- src/allmydata/client.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index ac8b03e2f..a31d05b9c 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -1,17 +1,9 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 - # Don't use future str to prevent leaking future's newbytes into foolscap, which they break. - from past.builtins import unicode as str +from __future__ import annotations +from typing import Optional import os, stat, time, weakref from base64 import urlsafe_b64encode from functools import partial @@ -668,7 +660,7 @@ class _Client(node.Node, pollmixin.PollMixin): # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3901. See also # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3931 for the bigger # picture issue. - self.storage_nurls = set() + self.storage_nurls : Optional[set] = None def init_stats_provider(self): self.stats_provider = StatsProvider(self) @@ -831,8 +823,8 @@ class _Client(node.Node, pollmixin.PollMixin): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) (_, _, swissnum) = furl.rpartition("/") - self.storage_nurls.update( - self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) + self.storage_nurls = self.tub.negotiationClass.add_storage_server( + ss, swissnum.encode("ascii") ) announcement["anonymous-storage-FURL"] = furl From d918135a0d016f579073dac7236a3aadfab76bbf Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:10:36 -0400 Subject: [PATCH 225/289] Use parser instead of ad-hoc parser. --- src/allmydata/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index a31d05b9c..417dffed8 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -822,7 +822,7 @@ class _Client(node.Node, pollmixin.PollMixin): if anonymous_storage_enabled(self.config): furl_file = self.config.get_private_path("storage.furl").encode(get_filesystem_encoding()) furl = self.tub.registerReference(FoolscapStorageServer(ss), furlFile=furl_file) - (_, _, swissnum) = furl.rpartition("/") + (_, _, swissnum) = decode_furl(furl) self.storage_nurls = self.tub.negotiationClass.add_storage_server( ss, swissnum.encode("ascii") ) From 5d53cd4a170cd7315b594102f705fcb9e7eec55e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:16:30 -0400 Subject: [PATCH 226/289] Nicer API. --- src/allmydata/node.py | 8 +++++--- src/allmydata/protocol_switch.py | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 597221e9b..d6cbc9e36 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -55,7 +55,7 @@ from allmydata.util.yamlutil import ( from . import ( __full_version__, ) -from .protocol_switch import support_foolscap_and_https +from .protocol_switch import create_tub_with_https_support def _common_valid_config(): @@ -708,8 +708,10 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han :param dict tub_options: every key-value pair in here will be set in the new Tub via `Tub.setOption` """ - tub = Tub(**kwargs) - support_foolscap_and_https(tub) + # We listen simulataneously for both Foolscap and HTTPS on the same port, + # so we have to create a special Foolscap Tub for that to work: + tub = create_tub_with_https_support(**kwargs) + for (name, value) in list(tub_options.items()): tub.setOption(name, value) handlers = default_connection_handlers.copy() diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index a17f3055c..2b4ce6da1 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -6,10 +6,11 @@ simple as possible, with no extra configuration needed. Listening on the same port means a user upgrading Tahoe-LAFS will automatically get HTTPS working with no additional changes. -Use ``support_foolscap_and_https()`` to create a new subclass for a ``Tub`` -instance, and then ``add_storage_server()`` on the resulting class to add the -relevant information for a storage server once it becomes available later in -the configuration process. +Use ``create_tub_with_https_support()`` creates a new ``Tub`` that has its +``negotiationClass`` modified to be a new subclass tied to that specific +``Tub`` instance. Calling ``tub.negotiationClass.add_storage_server(...)`` +then adds relevant information for a storage server once it becomes available +later in the configuration process. """ from __future__ import annotations @@ -193,14 +194,17 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): self.__dict__ = protocol.__dict__ -def support_foolscap_and_https(tub: Tub): +def create_tub_with_https_support(**kwargs) -> Tub: """ - Create a new Foolscap-or-HTTPS protocol class for a specific ``Tub`` + Create a new Tub that also supports HTTPS. + + This involves creating a new protocol switch class for the specific ``Tub`` instance. """ - the_tub = tub + the_tub = Tub(**kwargs) class FoolscapOrHttpForTub(_FoolscapOrHttps): tub = the_tub - tub.negotiationClass = FoolscapOrHttpForTub # type: ignore + the_tub.negotiationClass = FoolscapOrHttpForTub # type: ignore + return the_tub From 3034f35c7b1e1748a5d4a76f73585ced7fc1e2ff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:21:54 -0400 Subject: [PATCH 227/289] Document type expectations. --- src/allmydata/storage/http_server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 540675cc7..eefb9b906 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -4,7 +4,7 @@ HTTP server for storage. from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any, Callable, Union +from typing import Dict, List, Set, Tuple, Any, Callable, Union, cast from functools import wraps from base64 import b64decode import binascii @@ -19,6 +19,7 @@ from twisted.internet.interfaces import ( IStreamServerEndpoint, IPullProducer, ) +from twisted.internet.address import IPv4Address, IPv6Address from twisted.internet.defer import Deferred from twisted.internet.ssl import CertificateOptions, Certificate, PrivateCertificate from twisted.web.server import Site, Request @@ -911,9 +912,10 @@ def listen_tls( endpoint = _TLSEndpointWrapper.from_paths(endpoint, private_key_path, cert_path) def get_nurl(listening_port: IListeningPort) -> DecodedURL: + address = cast(Union[IPv4Address, IPv6Address], listening_port.getHost()) return build_nurl( hostname, - listening_port.getHost().port, + address.port, str(server._swissnum, "ascii"), load_pem_x509_certificate(cert_path.getContent()), ) From 58247799c1e5f12a2692be0dc72325484a38a6f6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:27:19 -0400 Subject: [PATCH 228/289] Fix remaining references to refactored-out-of-existence API. --- src/allmydata/protocol_switch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 2b4ce6da1..b0af84c33 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -53,13 +53,13 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): since these are created by Foolscap's ``Tub``, by setting this to be the tub's ``negotiationClass``. - Do not instantiate directly, use ``support_foolscap_and_https(tub)`` + Do not instantiate directly, use ``create_tub_with_https_support(...)`` instead. The way this class works is that a new subclass is created for a specific ``Tub`` instance. """ # These are class attributes; they will be set by - # support_foolscap_and_https() and add_storage_server(). + # create_tub_with_https_support() and add_storage_server(). # The Twisted HTTPS protocol factory wrapping the storage server HTTP API: https_factory: TLSMemoryBIOFactory From 795ec0b2dbc6e5f9d5de23fb64d8148b47025ccc Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 3 Oct 2022 11:52:07 -0400 Subject: [PATCH 229/289] Fix flake8 issue. --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index d99831347..c8a9669cb 100644 --- a/setup.py +++ b/setup.py @@ -382,6 +382,9 @@ setup(name="tahoe-lafs", # also set in __init__.py ':sys_platform=="win32"': ["pywin32 != 226"], "test": [ "flake8", + # On Python 3.7, importlib_metadata v5 breaks flake8. + # https://github.com/python/importlib_metadata/issues/407 + "importlib_metadata<5; python_version < '3.8'", # Pin a specific pyflakes so we don't have different folks # disagreeing on what is or is not a lint issue. We can bump # this version from time to time, but we will do it From a063241609ad918fe5617a8aedb6abaa660d36a5 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 3 Oct 2022 10:18:32 -0600 Subject: [PATCH 230/289] 1.18.0 release-notes --- relnotes.txt | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/relnotes.txt b/relnotes.txt index e9b298771..dd7cc9429 100644 --- a/relnotes.txt +++ b/relnotes.txt @@ -1,6 +1,6 @@ -ANNOUNCING Tahoe, the Least-Authority File Store, v1.17.1 +ANNOUNCING Tahoe, the Least-Authority File Store, v1.18.0 -The Tahoe-LAFS team is pleased to announce version 1.17.1 of +The Tahoe-LAFS team is pleased to announce version 1.18.0 of Tahoe-LAFS, an extremely reliable decentralized storage system. Get it with "pip install tahoe-lafs", or download a tarball here: @@ -15,10 +15,12 @@ unique security and fault-tolerance properties: https://tahoe-lafs.readthedocs.org/en/latest/about.html -The previous stable release of Tahoe-LAFS was v1.17.0, released on -December 6, 2021. +The previous stable release of Tahoe-LAFS was v1.17.1, released on +January 7, 2022. -This release fixes two Python3-releated regressions and 4 minor bugs. +This release drops support for Python 2 and for Python 3.6 and earlier. +twistd.pid is no longer used (in favour of one with pid + process creation time). +A collection of minor bugs and issues were also fixed. Please see ``NEWS.rst`` [1] for a complete list of changes. @@ -132,24 +134,23 @@ Of Fame" [13]. ACKNOWLEDGEMENTS -This is the nineteenth release of Tahoe-LAFS to be created -solely as a labor of love by volunteers. Thank you very much -to the team of "hackers in the public interest" who make -Tahoe-LAFS possible. +This is the twentieth release of Tahoe-LAFS to be created solely as a +labor of love by volunteers. Thank you very much to the team of +"hackers in the public interest" who make Tahoe-LAFS possible. meejah on behalf of the Tahoe-LAFS team -January 7, 2022 +October 1, 2022 Planet Earth -[1] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.17.1/NEWS.rst +[1] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.18.0/NEWS.rst [2] https://github.com/tahoe-lafs/tahoe-lafs/blob/master/docs/known_issues.rst [3] https://tahoe-lafs.org/trac/tahoe-lafs/wiki/RelatedProjects -[4] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.17.1/COPYING.GPL -[5] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.17.1/COPYING.TGPPL.rst -[6] https://tahoe-lafs.readthedocs.org/en/tahoe-lafs-1.17.1/INSTALL.html +[4] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.18.0/COPYING.GPL +[5] https://github.com/tahoe-lafs/tahoe-lafs/blob/tahoe-lafs-1.18.0/COPYING.TGPPL.rst +[6] https://tahoe-lafs.readthedocs.org/en/tahoe-lafs-1.18.0/INSTALL.html [7] https://lists.tahoe-lafs.org/mailman/listinfo/tahoe-dev [8] https://tahoe-lafs.org/trac/tahoe-lafs/roadmap [9] https://github.com/tahoe-lafs/tahoe-lafs/blob/master/CREDITS From 0e9ab8a0e3b6fe1058a2347270a5c6d3b6dfe060 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 3 Oct 2022 10:18:58 -0600 Subject: [PATCH 231/289] missed release-notes --- newsfragments/3927.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3927.minor diff --git a/newsfragments/3927.minor b/newsfragments/3927.minor new file mode 100644 index 000000000..e69de29bb From c13be0c89b8df744ff46b1b163e4b9138451169c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 09:19:48 -0400 Subject: [PATCH 232/289] Try harder to cleanup. --- src/allmydata/test/test_storage_https.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/test_storage_https.py b/src/allmydata/test/test_storage_https.py index 3b41e8308..bacb40290 100644 --- a/src/allmydata/test/test_storage_https.py +++ b/src/allmydata/test/test_storage_https.py @@ -198,6 +198,10 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") + # We keep getting TLSMemoryBIOProtocol being left around, so try harder + # to wait for it to finish. + await deferLater(reactor, 0.001) + # A potential attack to test is a private key that doesn't match the # certificate... but OpenSSL (quite rightly) won't let you listen with that # so I don't know how to test that! See From 8b2884cf3a1ce0d4d17c8483202b48055646b7ed Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 09:44:30 -0400 Subject: [PATCH 233/289] Make changes work again. --- src/allmydata/node.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 6747a3c77..7d33d220a 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -698,7 +698,7 @@ def create_connection_handlers(config, i2p_provider, tor_provider): def create_tub(tub_options, default_connection_handlers, foolscap_connection_handlers, - handler_overrides={}, **kwargs): + handler_overrides={}, force_foolscap=False, **kwargs): """ Create a Tub with the right options and handlers. It will be ephemeral unless the caller provides certFile= in kwargs @@ -708,10 +708,16 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han :param dict tub_options: every key-value pair in here will be set in the new Tub via `Tub.setOption` + + :param bool force_foolscap: If True, only allow Foolscap, not just HTTPS + storage protocol. """ - # We listen simulataneously for both Foolscap and HTTPS on the same port, + # We listen simultaneously for both Foolscap and HTTPS on the same port, # so we have to create a special Foolscap Tub for that to work: - tub = create_tub_with_https_support(**kwargs) + if force_foolscap: + tub = Tub(**kwargs) + else: + tub = create_tub_with_https_support(**kwargs) for (name, value) in list(tub_options.items()): tub.setOption(name, value) @@ -907,11 +913,10 @@ def create_main_tub(config, tub_options, tub_options, default_connection_handlers, foolscap_connection_handlers, + force_foolscap=config.get_config("node", "force_foolscap", False), handler_overrides=handler_overrides, certFile=certfile, ) - if not config.get_config("node", "force_foolscap", False): - support_foolscap_and_https(tub) if portlocation is None: log.msg("Tub is not listening") From fd07c092edf9e0367a0f2c6d770273a4ba1f6a52 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 10:30:07 -0400 Subject: [PATCH 234/289] close() is called while writes are still happening. --- src/allmydata/storage_client.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index f9a6feb7d..4ab818b9c 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1211,7 +1211,7 @@ class _HTTPBucketWriter(object): storage_index = attr.ib(type=bytes) share_number = attr.ib(type=int) upload_secret = attr.ib(type=bytes) - finished = attr.ib(type=bool, default=False) + finished = attr.ib(type=defer.Deferred[bool], factory=defer.Deferred) def abort(self): return self.client.abort_upload(self.storage_index, self.share_number, @@ -1223,14 +1223,13 @@ class _HTTPBucketWriter(object): self.storage_index, self.share_number, self.upload_secret, offset, data ) if result.finished: - self.finished = True + self.finished.callback(True) defer.returnValue(None) def close(self): - # A no-op in HTTP protocol. - if not self.finished: - return defer.fail(RuntimeError("You didn't finish writing?!")) - return defer.succeed(None) + # We're not _really_ closed until all writes have succeeded and we + # finished writing all the data. + return self.finished From 1294baa82e71e1d4cd8c63fc2c3f6e3041062505 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 10:30:27 -0400 Subject: [PATCH 235/289] LoopingCall may already have been stopped. --- src/allmydata/storage_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 4ab818b9c..a7d5edb11 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1066,7 +1066,8 @@ class HTTPNativeStorageServer(service.MultiService): def stopService(self): service.MultiService.stopService(self) - self._lc.stop() + if self._lc.running: + self._lc.stop() self._failed_to_connect("shut down") From ea1d2486115b848ec5a8409eae328792e5d2a338 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 10:51:43 -0400 Subject: [PATCH 236/289] These objects get stored in a context where they need to be hashed, sometimes. --- src/allmydata/storage/http_client.py | 11 +++++------ src/allmydata/storage_client.py | 5 ++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 16d426dda..1fe9a99fd 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -276,7 +276,7 @@ class _StorageClientHTTPSPolicy: ) -@define +@define(hash=True) class StorageClient(object): """ Low-level HTTP client that talks to the HTTP storage server. @@ -286,7 +286,7 @@ class StorageClient(object): # ``StorageClient.from_nurl()``. _base_url: DecodedURL _swissnum: bytes - _treq: Union[treq, StubTreq, HTTPClient] + _treq: Union[treq, StubTreq, HTTPClient] = field(eq=False) @classmethod def from_nurl( @@ -379,13 +379,12 @@ class StorageClient(object): return self._treq.request(method, url, headers=headers, **kwargs) +@define(hash=True) class StorageClientGeneral(object): """ High-level HTTP APIs that aren't immutable- or mutable-specific. """ - - def __init__(self, client): # type: (StorageClient) -> None - self._client = client + _client : StorageClient @inlineCallbacks def get_version(self): @@ -534,7 +533,7 @@ async def advise_corrupt_share( ) -@define +@define(hash=True) class StorageClientImmutables(object): """ APIs for interacting with immutables. diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index a7d5edb11..3b08f0b25 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1187,7 +1187,7 @@ class _StorageServer(object): -@attr.s +@attr.s(hash=True) class _FakeRemoteReference(object): """ Emulate a Foolscap RemoteReference, calling a local object instead. @@ -1203,7 +1203,6 @@ class _FakeRemoteReference(object): raise RemoteException(e.args) -@attr.s class _HTTPBucketWriter(object): """ Emulate a ``RIBucketWriter``, but use HTTP protocol underneath. @@ -1234,7 +1233,7 @@ class _HTTPBucketWriter(object): -@attr.s +@attr.s(hash=True) class _HTTPBucketReader(object): """ Emulate a ``RIBucketReader``, but use HTTP protocol underneath. From 8190eea48924a095bf8c681fc3a7b9960d7ed839 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 11:02:36 -0400 Subject: [PATCH 237/289] Fix bug introduced in previous commit. --- src/allmydata/storage_client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 3b08f0b25..6d59b4f7d 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1203,6 +1203,7 @@ class _FakeRemoteReference(object): raise RemoteException(e.args) +@attr.s class _HTTPBucketWriter(object): """ Emulate a ``RIBucketWriter``, but use HTTP protocol underneath. From 8b0ddf406e2863d0991f287032efbb203a15c8c4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 4 Oct 2022 11:17:19 -0400 Subject: [PATCH 238/289] Make HTTP and Foolscap match in another edge case. --- src/allmydata/storage_client.py | 15 ++++++++++++-- src/allmydata/test/test_istorageserver.py | 24 +++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 6d59b4f7d..51b1eabca 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -45,6 +45,7 @@ from zope.interface import ( Interface, implementer, ) +from twisted.python.failure import Failure from twisted.web import http from twisted.internet.task import LoopingCall from twisted.internet import defer, reactor @@ -1233,6 +1234,16 @@ class _HTTPBucketWriter(object): return self.finished +def _ignore_404(failure: Failure) -> Union[Failure, None]: + """ + Useful for advise_corrupt_share(), since it swallows unknown share numbers + in Foolscap. + """ + if failure.check(HTTPClientException) and failure.value.code == http.NOT_FOUND: + return None + else: + return failure + @attr.s(hash=True) class _HTTPBucketReader(object): @@ -1252,7 +1263,7 @@ class _HTTPBucketReader(object): return self.client.advise_corrupt_share( self.storage_index, self.share_number, str(reason, "utf-8", errors="backslashreplace") - ) + ).addErrback(_ignore_404) # WORK IN PROGRESS, for now it doesn't actually implement whole thing. @@ -1352,7 +1363,7 @@ class _HTTPStorageServer(object): raise ValueError("Unknown share type") return client.advise_corrupt_share( storage_index, shnum, str(reason, "utf-8", errors="backslashreplace") - ) + ).addErrback(_ignore_404) @defer.inlineCallbacks def slot_readv(self, storage_index, shares, readv): diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index 81025d779..a0370bdb6 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -440,6 +440,17 @@ class IStorageServerImmutableAPIsTestsMixin(object): b"immutable", storage_index, 0, b"ono" ) + @inlineCallbacks + def test_advise_corrupt_share_unknown_share_number(self): + """ + Calling ``advise_corrupt_share()`` on an immutable share, with an + unknown share number, does not result in error. + """ + storage_index, _, _ = yield self.create_share() + yield self.storage_client.advise_corrupt_share( + b"immutable", storage_index, 999, b"ono" + ) + @inlineCallbacks def test_allocate_buckets_creates_lease(self): """ @@ -909,6 +920,19 @@ class IStorageServerMutableAPIsTestsMixin(object): b"mutable", storage_index, 0, b"ono" ) + @inlineCallbacks + def test_advise_corrupt_share_unknown_share_number(self): + """ + Calling ``advise_corrupt_share()`` on a mutable share with an unknown + share number does not result in error (other behavior is opaque at this + level of abstraction). + """ + secrets, storage_index = yield self.create_slot() + + yield self.storage_client.advise_corrupt_share( + b"mutable", storage_index, 999, b"ono" + ) + @inlineCallbacks def test_STARAW_create_lease(self): """ From 0d23237b11aea61241a75e4d19c6df394b9de0b2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 13 Oct 2022 13:44:49 -0400 Subject: [PATCH 239/289] Some progress towards passing test_rref. --- src/allmydata/storage/http_client.py | 44 +++++++++++++++++++++---- src/allmydata/storage_client.py | 16 +++++---- src/allmydata/test/common_system.py | 2 ++ src/allmydata/test/test_storage_http.py | 5 +++ src/allmydata/test/test_system.py | 9 +++++ 5 files changed, 64 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 1fe9a99fd..2589d4e41 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -20,7 +20,7 @@ from twisted.web.http_headers import Headers from twisted.web import http from twisted.web.iweb import IPolicyForHTTPS from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred, succeed -from twisted.internet.interfaces import IOpenSSLClientConnectionCreator +from twisted.internet.interfaces import IOpenSSLClientConnectionCreator, IReactorTime from twisted.internet.ssl import CertificateOptions from twisted.web.client import Agent, HTTPConnectionPool from zope.interface import implementer @@ -282,15 +282,32 @@ class StorageClient(object): Low-level HTTP client that talks to the HTTP storage server. """ + # If True, we're doing unit testing. + TEST_MODE = False + + @classmethod + def start_test_mode(cls): + """Switch to testing mode. + + In testing mode we disable persistent HTTP queries and have shorter + timeouts, to make certain tests work, but don't change the actual + semantic work being done—given a fast server, everything would work the + same. + """ + cls.TEST_MODE = True + # The URL is a HTTPS URL ("https://..."). To construct from a NURL, use # ``StorageClient.from_nurl()``. _base_url: DecodedURL _swissnum: bytes _treq: Union[treq, StubTreq, HTTPClient] = field(eq=False) + _clock: IReactorTime @classmethod def from_nurl( - cls, nurl: DecodedURL, reactor, persistent: bool = True + cls, + nurl: DecodedURL, + reactor, ) -> StorageClient: """ Create a ``StorageClient`` for the given NURL. @@ -302,16 +319,23 @@ class StorageClient(object): swissnum = nurl.path[0].encode("ascii") certificate_hash = nurl.user.encode("ascii") + if cls.TEST_MODE: + pool = HTTPConnectionPool(reactor, persistent=False) + pool.retryAutomatically = False + pool.maxPersistentPerHost = 0 + else: + pool = HTTPConnectionPool(reactor) + treq_client = HTTPClient( Agent( reactor, _StorageClientHTTPSPolicy(expected_spki_hash=certificate_hash), - pool=HTTPConnectionPool(reactor, persistent=persistent), + pool=pool, ) ) https_url = DecodedURL().replace(scheme="https", host=nurl.host, port=nurl.port) - return cls(https_url, swissnum, treq_client) + return cls(https_url, swissnum, treq_client, reactor) def relative_url(self, path): """Get a URL relative to the base URL.""" @@ -376,7 +400,14 @@ class StorageClient(object): kwargs["data"] = dumps(message_to_serialize) headers.addRawHeader("Content-Type", CBOR_MIME_TYPE) - return self._treq.request(method, url, headers=headers, **kwargs) + result = self._treq.request(method, url, headers=headers, **kwargs) + + # If we're in test mode, we want an aggressive timeout, e.g. for + # test_rref in test_system.py. + if self.TEST_MODE: + result.addTimeout(1, self._clock) + + return result @define(hash=True) @@ -384,7 +415,8 @@ class StorageClientGeneral(object): """ High-level HTTP APIs that aren't immutable- or mutable-specific. """ - _client : StorageClient + + _client: StorageClient @inlineCallbacks def get_version(self): diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 51b1eabca..d492ee4cf 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -951,14 +951,18 @@ class HTTPNativeStorageServer(service.MultiService): self.announcement = announcement self._on_status_changed = ObserverList() furl = announcement["anonymous-storage-FURL"].encode("utf-8") - self._nickname, self._permutation_seed, self._tubid, self._short_description, self._long_description = _parse_announcement(server_id, furl, announcement) + ( + self._nickname, + self._permutation_seed, + self._tubid, + self._short_description, + self._long_description + ) = _parse_announcement(server_id, furl, announcement) + # TODO need some way to do equivalent of Happy Eyeballs for multiple NURLs? + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3935 nurl = DecodedURL.from_text(announcement["anonymous-storage-NURLs"][0]) - # Tests don't want persistent HTTPS pool, since that leaves a dirty - # reactor. As a reasonable hack, disabling persistent connnections for - # localhost allows us to have passing tests while not reducing - # performance for real-world usage. self._istorage_server = _HTTPStorageServer.from_http_client( - StorageClient.from_nurl(nurl, reactor, nurl.host not in ("localhost", "127.0.0.1")) + StorageClient.from_nurl(nurl, reactor) ) self._connection_status = connection_status.ConnectionStatus.unstarted() diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 75379bbf3..ef4b65529 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -28,6 +28,7 @@ from foolscap.api import flushEventualQueue from allmydata import client from allmydata.introducer.server import create_introducer from allmydata.util import fileutil, log, pollmixin +from allmydata.storage import http_client from twisted.python.filepath import ( FilePath, @@ -645,6 +646,7 @@ def _render_section_values(values): class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def setUp(self): + http_client.StorageClient.start_test_mode() self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 4a912cf6c..819c94f83 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -291,6 +291,7 @@ class CustomHTTPServerTests(SyncTestCase): def setUp(self): super(CustomHTTPServerTests, self).setUp() + StorageClient.start_test_mode() # Could be a fixture, but will only be used in this test class so not # going to bother: self._http_server = TestApp() @@ -298,6 +299,7 @@ class CustomHTTPServerTests(SyncTestCase): DecodedURL.from_text("http://127.0.0.1"), SWISSNUM_FOR_TEST, treq=StubTreq(self._http_server._app.resource()), + clock=Clock() ) def test_authorization_enforcement(self): @@ -375,6 +377,7 @@ class HttpTestFixture(Fixture): """ def _setUp(self): + StorageClient.start_test_mode() self.clock = Clock() self.tempdir = self.useFixture(TempDir()) # The global Cooperator used by Twisted (a) used by pull producers in @@ -396,6 +399,7 @@ class HttpTestFixture(Fixture): DecodedURL.from_text("http://127.0.0.1"), SWISSNUM_FOR_TEST, treq=self.treq, + clock=self.clock, ) def result_of_with_flush(self, d): @@ -480,6 +484,7 @@ class GenericHTTPAPITests(SyncTestCase): DecodedURL.from_text("http://127.0.0.1"), b"something wrong", treq=StubTreq(self.http.http_server.get_resource()), + clock=self.http.clock, ) ) with assert_fails_with_http_code(self, http.UNAUTHORIZED): diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index d859a0e00..d94b4d163 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -1796,6 +1796,15 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): class Connections(SystemTestMixin, unittest.TestCase): def test_rref(self): + # The way the listening port is created is via + # SameProcessStreamEndpointAssigner (allmydata.test.common), which then + # makes an endpoint string parsed by AdoptedServerPort. The latter does + # dup(fd), which results in the filedescriptor staying alive _until the + # test ends_. That means that when we disown the service, we still have + # the listening port there on the OS level! Just the resulting + # connections aren't handled. So this test relies on aggressive + # timeouts in the HTTP client and presumably some equivalent in + # Foolscap, since connection refused does _not_ happen. self.basedir = "system/Connections/rref" d = self.set_up_nodes(2) def _start(ign): From b80a215ae1dc80a3760049bec864fe227eee1654 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 13 Oct 2022 13:56:28 -0400 Subject: [PATCH 240/289] test_rref passes now. --- src/allmydata/storage_client.py | 8 ++++---- src/allmydata/test/common_system.py | 2 ++ src/allmydata/test/test_system.py | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index d492ee4cf..6f2106f87 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -1052,10 +1052,9 @@ class HTTPNativeStorageServer(service.MultiService): """ See ``IServer.get_storage_server``. """ - if self.is_connected(): - return self._istorage_server - else: + if self._connection_status.summary == "unstarted": return None + return self._istorage_server def stop_connecting(self): self._lc.stop() @@ -1070,10 +1069,11 @@ class HTTPNativeStorageServer(service.MultiService): ) def stopService(self): - service.MultiService.stopService(self) + result = service.MultiService.stopService(self) if self._lc.running: self._lc.stop() self._failed_to_connect("shut down") + return result class UnknownServerTypeError(Exception): diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index ef4b65529..ee345a0c0 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -21,6 +21,7 @@ from functools import partial from twisted.internet import reactor from twisted.internet import defer from twisted.internet.defer import inlineCallbacks +from twisted.internet.task import deferLater from twisted.application import service from foolscap.api import flushEventualQueue @@ -658,6 +659,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): log.msg("shutting down SystemTest services") d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) + d.addBoth(lambda x: deferLater(reactor, 0.01, lambda: x)) return d def getdir(self, subdir): diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index d94b4d163..c6d2c6bb7 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -1821,9 +1821,10 @@ class Connections(SystemTestMixin, unittest.TestCase): # now shut down the server d.addCallback(lambda ign: self.clients[1].disownServiceParent()) + # and wait for the client to notice def _poll(): - return len(self.c0.storage_broker.get_connected_servers()) < 2 + return len(self.c0.storage_broker.get_connected_servers()) == 1 d.addCallback(lambda ign: self.poll(_poll)) def _down(ign): From 0f31e3cd4b054b17076ffeaa73cc412bc63191b3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 13 Oct 2022 14:41:59 -0400 Subject: [PATCH 241/289] Leave HTTP off by default for now. --- src/allmydata/node.py | 8 ++++++-- src/allmydata/test/common_system.py | 5 ++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 7d33d220a..f572cf7d9 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -908,12 +908,16 @@ def create_main_tub(config, tub_options, # FIXME? "node.pem" was the CERTFILE option/thing certfile = config.get_private_path("node.pem") - tub = create_tub( tub_options, default_connection_handlers, foolscap_connection_handlers, - force_foolscap=config.get_config("node", "force_foolscap", False), + # TODO eventually we will want the default to be False, but for now we + # don't want to enable HTTP by default. + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3934 + force_foolscap=config.get_config( + "node", "force_foolscap", default=True, boolean=True + ), handler_overrides=handler_overrides, certFile=certfile, ) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index ee345a0c0..edeea5689 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -794,13 +794,13 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): if which in feature_matrix.get((section, feature), {which}): config.setdefault(section, {})[feature] = value - if force_foolscap: - config.setdefault("node", {})["force_foolscap"] = force_foolscap + #config.setdefault("node", {})["force_foolscap"] = force_foolscap setnode = partial(setconf, config, which, "node") sethelper = partial(setconf, config, which, "helper") setnode("nickname", u"client %d \N{BLACK SMILING FACE}" % (which,)) + setnode("force_foolscap", str(force_foolscap)) tub_location_hint, tub_port_endpoint = self.port_assigner.assign(reactor) setnode("tub.port", tub_port_endpoint) @@ -818,7 +818,6 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): " furl: %s\n") % self.introducer_furl iyaml_fn = os.path.join(basedir, "private", "introducers.yaml") fileutil.write(iyaml_fn, iyaml) - return _render_config(config) def _set_up_client_node(self, which, force_foolscap): From 42d38433436a0f7650704fd45383688f4eeb9ac1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 14 Oct 2022 09:16:59 -0400 Subject: [PATCH 242/289] Run test_system with both Foolscap and HTTP storage protocols, plus some resulting cleanups. --- src/allmydata/test/common_system.py | 37 +++++++------ src/allmydata/test/test_istorageserver.py | 65 +++++++++-------------- src/allmydata/test/test_system.py | 17 +++++- 3 files changed, 63 insertions(+), 56 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index edeea5689..96ab4e093 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -5,16 +5,7 @@ in ``allmydata.test.test_system``. Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - # Don't import bytes since it causes issues on (so far unported) modules on Python 2. - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, max, min, str # noqa: F401 - +from typing import Optional import os from functools import partial @@ -30,6 +21,10 @@ from allmydata import client from allmydata.introducer.server import create_introducer from allmydata.util import fileutil, log, pollmixin from allmydata.storage import http_client +from allmydata.storage_client import ( + NativeStorageServer, + HTTPNativeStorageServer, +) from twisted.python.filepath import ( FilePath, @@ -646,6 +641,11 @@ def _render_section_values(values): class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): + # If set to True, use Foolscap for storage protocol. If set to False, HTTP + # will be used when possible. If set to None, this suggests a bug in the + # test code. + FORCE_FOOLSCAP_FOR_STORAGE : Optional[bool] = None + def setUp(self): http_client.StorageClient.start_test_mode() self.port_assigner = SameProcessStreamEndpointAssigner() @@ -702,7 +702,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): return f.read().strip() @inlineCallbacks - def set_up_nodes(self, NUMCLIENTS=5, force_foolscap=False): + def set_up_nodes(self, NUMCLIENTS=5): """ Create an introducer and ``NUMCLIENTS`` client nodes pointed at it. All of the nodes are running in this process. @@ -715,18 +715,25 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): :param int NUMCLIENTS: The number of client nodes to create. - :param bool force_foolscap: Force clients to use Foolscap instead of e.g. - HTTPS when available. - :return: A ``Deferred`` that fires when the nodes have connected to each other. """ + self.assertIn( + self.FORCE_FOOLSCAP_FOR_STORAGE, (True, False), + "You forgot to set FORCE_FOOLSCAP_FOR_STORAGE on {}".format(self.__class__) + ) self.numclients = NUMCLIENTS self.introducer = yield self._create_introducer() self.add_service(self.introducer) self.introweb_url = self._get_introducer_web() - yield self._set_up_client_nodes(force_foolscap) + yield self._set_up_client_nodes(self.FORCE_FOOLSCAP_FOR_STORAGE) + native_server = next(iter(self.clients[0].storage_broker.get_known_servers())) + if self.FORCE_FOOLSCAP_FOR_STORAGE: + expected_storage_server_class = NativeStorageServer + else: + expected_storage_server_class = HTTPNativeStorageServer + self.assertIsInstance(native_server, expected_storage_server_class) @inlineCallbacks def _set_up_client_nodes(self, force_foolscap): diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index a0370bdb6..a488622c7 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -1046,13 +1046,12 @@ class _SharedMixin(SystemTestMixin): """Base class for Foolscap and HTTP mixins.""" SKIP_TESTS = set() # type: Set[str] - FORCE_FOOLSCAP = False - - def _get_native_server(self): - return next(iter(self.clients[0].storage_broker.get_known_servers())) def _get_istorage_server(self): - raise NotImplementedError("implement in subclass") + native_server = next(iter(self.clients[0].storage_broker.get_known_servers())) + client = native_server.get_storage_server() + self.assertTrue(IStorageServer.providedBy(client)) + return client @inlineCallbacks def setUp(self): @@ -1065,7 +1064,7 @@ class _SharedMixin(SystemTestMixin): self.basedir = "test_istorageserver/" + self.id() yield SystemTestMixin.setUp(self) - yield self.set_up_nodes(1, self.FORCE_FOOLSCAP) + yield self.set_up_nodes(1) self.server = None for s in self.clients[0].services: if isinstance(s, StorageServer): @@ -1075,7 +1074,7 @@ class _SharedMixin(SystemTestMixin): self._clock = Clock() self._clock.advance(123456) self.server._clock = self._clock - self.storage_client = yield self._get_istorage_server() + self.storage_client = self._get_istorage_server() def fake_time(self): """Return the current fake, test-controlled, time.""" @@ -1091,49 +1090,29 @@ class _SharedMixin(SystemTestMixin): yield SystemTestMixin.tearDown(self) -class _FoolscapMixin(_SharedMixin): - """Run tests on Foolscap version of ``IStorageServer``.""" - - FORCE_FOOLSCAP = True - - def _get_istorage_server(self): - native_server = self._get_native_server() - assert isinstance(native_server, NativeStorageServer) - client = native_server.get_storage_server() - self.assertTrue(IStorageServer.providedBy(client)) - return succeed(client) - - -class _HTTPMixin(_SharedMixin): - """Run tests on the HTTP version of ``IStorageServer``.""" - - FORCE_FOOLSCAP = False - - def _get_istorage_server(self): - native_server = self._get_native_server() - assert isinstance(native_server, HTTPNativeStorageServer) - client = native_server.get_storage_server() - self.assertTrue(IStorageServer.providedBy(client)) - return succeed(client) - - class FoolscapSharedAPIsTests( - _FoolscapMixin, IStorageServerSharedAPIsTestsMixin, AsyncTestCase + _SharedMixin, IStorageServerSharedAPIsTestsMixin, AsyncTestCase ): """Foolscap-specific tests for shared ``IStorageServer`` APIs.""" + FORCE_FOOLSCAP_FOR_STORAGE = True + class HTTPSharedAPIsTests( - _HTTPMixin, IStorageServerSharedAPIsTestsMixin, AsyncTestCase + _SharedMixin, IStorageServerSharedAPIsTestsMixin, AsyncTestCase ): """HTTP-specific tests for shared ``IStorageServer`` APIs.""" + FORCE_FOOLSCAP_FOR_STORAGE = False + class FoolscapImmutableAPIsTests( - _FoolscapMixin, IStorageServerImmutableAPIsTestsMixin, AsyncTestCase + _SharedMixin, IStorageServerImmutableAPIsTestsMixin, AsyncTestCase ): """Foolscap-specific tests for immutable ``IStorageServer`` APIs.""" + FORCE_FOOLSCAP_FOR_STORAGE = True + def test_disconnection(self): """ If we disconnect in the middle of writing to a bucket, all data is @@ -1156,23 +1135,29 @@ class FoolscapImmutableAPIsTests( """ current = self.storage_client yield self.bounce_client(0) - self.storage_client = self._get_native_server().get_storage_server() + self.storage_client = self._get_istorage_server() assert self.storage_client is not current class HTTPImmutableAPIsTests( - _HTTPMixin, IStorageServerImmutableAPIsTestsMixin, AsyncTestCase + _SharedMixin, IStorageServerImmutableAPIsTestsMixin, AsyncTestCase ): """HTTP-specific tests for immutable ``IStorageServer`` APIs.""" + FORCE_FOOLSCAP_FOR_STORAGE = False + class FoolscapMutableAPIsTests( - _FoolscapMixin, IStorageServerMutableAPIsTestsMixin, AsyncTestCase + _SharedMixin, IStorageServerMutableAPIsTestsMixin, AsyncTestCase ): """Foolscap-specific tests for mutable ``IStorageServer`` APIs.""" + FORCE_FOOLSCAP_FOR_STORAGE = True + class HTTPMutableAPIsTests( - _HTTPMixin, IStorageServerMutableAPIsTestsMixin, AsyncTestCase + _SharedMixin, IStorageServerMutableAPIsTestsMixin, AsyncTestCase ): """HTTP-specific tests for mutable ``IStorageServer`` APIs.""" + + FORCE_FOOLSCAP_FOR_STORAGE = False diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index c6d2c6bb7..a83ff9488 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -117,7 +117,8 @@ class CountingDataUploadable(upload.Data): class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): - + """Foolscap integration-y tests.""" + FORCE_FOOLSCAP_FOR_STORAGE = True timeout = 180 def test_connections(self): @@ -1794,6 +1795,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): class Connections(SystemTestMixin, unittest.TestCase): + FORCE_FOOLSCAP_FOR_STORAGE = True def test_rref(self): # The way the listening port is created is via @@ -1834,3 +1836,16 @@ class Connections(SystemTestMixin, unittest.TestCase): self.assertEqual(storage_server, self.s1_storage_server) d.addCallback(_down) return d + + +class HTTPSystemTest(SystemTest): + """HTTP storage protocol variant of the system tests.""" + + FORCE_FOOLSCAP_FOR_STORAGE = False + + + +class HTTPConnections(Connections): + """HTTP storage protocol variant of the connections tests.""" + FORCE_FOOLSCAP_FOR_STORAGE = False + From e409262e86ff3639187bfa89f438b6e9db071228 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 14 Oct 2022 09:50:07 -0400 Subject: [PATCH 243/289] Fix some flakes. --- src/allmydata/test/test_istorageserver.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index a488622c7..9e7e7b6e1 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -15,7 +15,7 @@ from typing import Set from random import Random from unittest import SkipTest -from twisted.internet.defer import inlineCallbacks, returnValue, succeed +from twisted.internet.defer import inlineCallbacks, returnValue from twisted.internet.task import Clock from foolscap.api import Referenceable, RemoteException @@ -25,10 +25,6 @@ from allmydata.interfaces import IStorageServer from .common_system import SystemTestMixin from .common import AsyncTestCase from allmydata.storage.server import StorageServer # not a IStorageServer!! -from allmydata.storage_client import ( - NativeStorageServer, - HTTPNativeStorageServer, -) # Use random generator with known seed, so results are reproducible if tests From 0febc8745653992cbb53d98702c92edc24b7a516 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 14 Oct 2022 10:03:06 -0400 Subject: [PATCH 244/289] Don't include reactor in comparison. --- src/allmydata/storage/http_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 2589d4e41..40979d3cb 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -301,7 +301,7 @@ class StorageClient(object): _base_url: DecodedURL _swissnum: bytes _treq: Union[treq, StubTreq, HTTPClient] = field(eq=False) - _clock: IReactorTime + _clock: IReactorTime = field(eq=False) @classmethod def from_nurl( From f68c3978f616c5efecc15094aa83c363bd6db58d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 14 Oct 2022 10:18:38 -0400 Subject: [PATCH 245/289] News fragment. --- newsfragments/3783.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3783.minor diff --git a/newsfragments/3783.minor b/newsfragments/3783.minor new file mode 100644 index 000000000..e69de29bb From 1a3e3a86c317c22a79790cc134102f6dc5b368ff Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 14 Oct 2022 11:27:04 -0400 Subject: [PATCH 246/289] Require latest pycddl, and work around a regression. --- newsfragments/3938.bugfix | 1 + setup.py | 2 +- src/allmydata/storage/http_client.py | 12 ++++++------ src/allmydata/storage/http_server.py | 12 +++++------- 4 files changed, 13 insertions(+), 14 deletions(-) create mode 100644 newsfragments/3938.bugfix diff --git a/newsfragments/3938.bugfix b/newsfragments/3938.bugfix new file mode 100644 index 000000000..c2778cfdf --- /dev/null +++ b/newsfragments/3938.bugfix @@ -0,0 +1 @@ +Work with (and require) newer versions of pycddl. \ No newline at end of file diff --git a/setup.py b/setup.py index 72478767c..768e44e29 100644 --- a/setup.py +++ b/setup.py @@ -137,7 +137,7 @@ install_requires = [ "werkzeug != 2.2.0", "treq", "cbor2", - "pycddl", + "pycddl >= 0.2", # for pid-file support "psutil", diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 16d426dda..420d3610f 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -83,35 +83,35 @@ _SCHEMAS = { "allocate_buckets": Schema( """ response = { - already-have: #6.258([* uint]) - allocated: #6.258([* uint]) + already-have: #6.258([0*256 uint]) + allocated: #6.258([0*256 uint]) } """ ), "immutable_write_share_chunk": Schema( """ response = { - required: [* {begin: uint, end: uint}] + required: [0* {begin: uint, end: uint}] } """ ), "list_shares": Schema( """ - response = #6.258([* uint]) + response = #6.258([0*256 uint]) """ ), "mutable_read_test_write": Schema( """ response = { "success": bool, - "data": {* share_number: [* bstr]} + "data": {0*256 share_number: [0* bstr]} } share_number = uint """ ), "mutable_list_shares": Schema( """ - response = #6.258([* uint]) + response = #6.258([0*256 uint]) """ ), } diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index eefb9b906..3902976ba 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -260,7 +260,7 @@ _SCHEMAS = { "allocate_buckets": Schema( """ request = { - share-numbers: #6.258([*256 uint]) + share-numbers: #6.258([0*256 uint]) allocated-size: uint } """ @@ -276,15 +276,13 @@ _SCHEMAS = { """ request = { "test-write-vectors": { - ; TODO Add length limit here, after - ; https://github.com/anweiss/cddl/issues/128 is fixed - * share_number => { - "test": [*30 {"offset": uint, "size": uint, "specimen": bstr}] - "write": [*30 {"offset": uint, "data": bstr}] + 0*256 share_number : { + "test": [0*30 {"offset": uint, "size": uint, "specimen": bstr}] + "write": [0*30 {"offset": uint, "data": bstr}] "new-length": uint / null } } - "read-vector": [*30 {"offset": uint, "size": uint}] + "read-vector": [0*30 {"offset": uint, "size": uint}] } share_number = uint """ From 46fbe3d0283695dc503fabb0b9f8c4ed9401cdcf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 18 Oct 2022 17:32:23 -0400 Subject: [PATCH 247/289] bump pypi-deps-db for new pycddl version --- nix/sources.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/sources.json b/nix/sources.json index 79eabe7a1..950151416 100644 --- a/nix/sources.json +++ b/nix/sources.json @@ -53,10 +53,10 @@ "homepage": "", "owner": "DavHau", "repo": "pypi-deps-db", - "rev": "76b8f1e44a8ec051b853494bcf3cc8453a294a6a", - "sha256": "18fgqyh4z578jjhk26n1xi2cw2l98vrqp962rgz9a6wa5yh1nm4x", + "rev": "5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6", + "sha256": "0pc6mj7rzvmhh303rvj5wf4hrksm4h2rf4fsvqs0ljjdmgxrqm3f", "type": "tarball", - "url": "https://github.com/DavHau/pypi-deps-db/archive/76b8f1e44a8ec051b853494bcf3cc8453a294a6a.tar.gz", + "url": "https://github.com/DavHau/pypi-deps-db/archive/5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6.tar.gz", "url_template": "https://github.com///archive/.tar.gz" } } From 48ae729c0de57818d132763aa62e99faffd46556 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 2 Nov 2022 10:18:23 -0400 Subject: [PATCH 248/289] Don't reuse basedir across tests. --- src/allmydata/test/test_system.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index a83ff9488..f03d795ba 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -121,8 +121,13 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): FORCE_FOOLSCAP_FOR_STORAGE = True timeout = 180 + @property + def basedir(self): + return "system/SystemTest/{}-foolscap-{}".format( + self.id().split(".")[-1], self.FORCE_FOOLSCAP_FOR_STORAGE + ) + def test_connections(self): - self.basedir = "system/SystemTest/test_connections" d = self.set_up_nodes() self.extra_node = None d.addCallback(lambda res: self.add_extra_node(self.numclients)) @@ -150,11 +155,9 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): del test_connections def test_upload_and_download_random_key(self): - self.basedir = "system/SystemTest/test_upload_and_download_random_key" return self._test_upload_and_download(convergence=None) def test_upload_and_download_convergent(self): - self.basedir = "system/SystemTest/test_upload_and_download_convergent" return self._test_upload_and_download(convergence=b"some convergence string") def _test_upload_and_download(self, convergence): @@ -517,7 +520,6 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def test_mutable(self): - self.basedir = "system/SystemTest/test_mutable" DATA = b"initial contents go here." # 25 bytes % 3 != 0 DATA_uploadable = MutableData(DATA) NEWDATA = b"new contents yay" @@ -747,7 +749,6 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): # plaintext_hash check. def test_filesystem(self): - self.basedir = "system/SystemTest/test_filesystem" self.data = LARGE_DATA d = self.set_up_nodes() def _new_happy_semantics(ign): @@ -1714,7 +1715,6 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def test_filesystem_with_cli_in_subprocess(self): # We do this in a separate test so that test_filesystem doesn't skip if we can't run bin/tahoe. - self.basedir = "system/SystemTest/test_filesystem_with_cli_in_subprocess" d = self.set_up_nodes() def _new_happy_semantics(ign): for c in self.clients: @@ -1807,7 +1807,9 @@ class Connections(SystemTestMixin, unittest.TestCase): # connections aren't handled. So this test relies on aggressive # timeouts in the HTTP client and presumably some equivalent in # Foolscap, since connection refused does _not_ happen. - self.basedir = "system/Connections/rref" + self.basedir = "system/Connections/rref-foolscap-{}".format( + self.FORCE_FOOLSCAP_FOR_STORAGE + ) d = self.set_up_nodes(2) def _start(ign): self.c0 = self.clients[0] From e05136c2385d222bd50413054dc8ac2a9d60d243 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 2 Nov 2022 13:13:21 -0400 Subject: [PATCH 249/289] Less aggressive timeout, to try to make tests pass on CI. --- src/allmydata/storage/http_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index adc3e1525..e520088c3 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -405,7 +405,7 @@ class StorageClient(object): # If we're in test mode, we want an aggressive timeout, e.g. for # test_rref in test_system.py. if self.TEST_MODE: - result.addTimeout(1, self._clock) + result.addTimeout(5, self._clock) return result From db59eb12c092264f357c59afc3586dcb8259d0f8 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 2 Nov 2022 15:22:36 -0400 Subject: [PATCH 250/289] Increase timeout. --- .circleci/run-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/run-tests.sh b/.circleci/run-tests.sh index 764651c40..854013c32 100755 --- a/.circleci/run-tests.sh +++ b/.circleci/run-tests.sh @@ -52,7 +52,7 @@ fi # This is primarily aimed at catching hangs on the PyPy job which runs for # about 21 minutes and then gets killed by CircleCI in a way that fails the # job and bypasses our "allowed failure" logic. -TIMEOUT="timeout --kill-after 1m 15m" +TIMEOUT="timeout --kill-after 1m 25m" # Run the test suite as a non-root user. This is the expected usage some # small areas of the test suite assume non-root privileges (such as unreadable From 262d9d85b97cb064da47c82bab22e62b48db6cd4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 3 Nov 2022 14:14:21 -0400 Subject: [PATCH 251/289] Switch to using persistent connections in tests too. --- src/allmydata/storage/http_client.py | 34 +++++++++++++++------------- src/allmydata/test/common_system.py | 10 +++++++- src/allmydata/test/test_system.py | 3 +++ 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index e520088c3..96820d4a5 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -282,19 +282,20 @@ class StorageClient(object): Low-level HTTP client that talks to the HTTP storage server. """ - # If True, we're doing unit testing. - TEST_MODE = False + # If set, we're doing unit testing and we should call this with + # HTTPConnectionPool we create. + TEST_MODE_REGISTER_HTTP_POOL = None @classmethod - def start_test_mode(cls): + def start_test_mode(cls, callback): """Switch to testing mode. - In testing mode we disable persistent HTTP queries and have shorter - timeouts, to make certain tests work, but don't change the actual - semantic work being done—given a fast server, everything would work the - same. + In testing mode we register the pool with test system using the given + callback so it can Do Things, most notably killing off idle HTTP + connections at test shutdown and, in some tests, in the midddle of the + test. """ - cls.TEST_MODE = True + cls.TEST_MODE_REGISTER_HTTP_POOL = callback # The URL is a HTTPS URL ("https://..."). To construct from a NURL, use # ``StorageClient.from_nurl()``. @@ -318,13 +319,10 @@ class StorageClient(object): assert nurl.scheme == "pb" swissnum = nurl.path[0].encode("ascii") certificate_hash = nurl.user.encode("ascii") + pool = HTTPConnectionPool(reactor) - if cls.TEST_MODE: - pool = HTTPConnectionPool(reactor, persistent=False) - pool.retryAutomatically = False - pool.maxPersistentPerHost = 0 - else: - pool = HTTPConnectionPool(reactor) + if cls.TEST_MODE_REGISTER_HTTP_POOL is not None: + cls.TEST_MODE_REGISTER_HTTP_POOL(pool) treq_client = HTTPClient( Agent( @@ -403,8 +401,12 @@ class StorageClient(object): result = self._treq.request(method, url, headers=headers, **kwargs) # If we're in test mode, we want an aggressive timeout, e.g. for - # test_rref in test_system.py. - if self.TEST_MODE: + # test_rref in test_system.py. Unfortunately, test_rref results in the + # socket still listening(!), only without an HTTP server, due to limits + # in the relevant socket-binding test setup code. As a result, we don't + # get connection refused, the client will successfully connect. So we + # want a timeout so we notice that. + if self.TEST_MODE_REGISTER_HTTP_POOL is not None: result.addTimeout(5, self._clock) return result diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 96ab4e093..f47aad3b6 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -647,7 +647,8 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): FORCE_FOOLSCAP_FOR_STORAGE : Optional[bool] = None def setUp(self): - http_client.StorageClient.start_test_mode() + self._http_client_pools = [] + http_client.StorageClient.start_test_mode(self._http_client_pools.append) self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) @@ -655,10 +656,17 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): self.sparent = service.MultiService() self.sparent.startService() + def close_idle_http_connections(self): + """Close all HTTP client connections that are just hanging around.""" + return defer.gatherResults( + [pool.closeCachedConnections() for pool in self._http_client_pools] + ) + def tearDown(self): log.msg("shutting down SystemTest services") d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) + d.addBoth(lambda x: self.close_idle_http_connections().addCallback(lambda _: x)) d.addBoth(lambda x: deferLater(reactor, 0.01, lambda: x)) return d diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index f03d795ba..670ac5868 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -1826,6 +1826,9 @@ class Connections(SystemTestMixin, unittest.TestCase): # now shut down the server d.addCallback(lambda ign: self.clients[1].disownServiceParent()) + # kill any persistent http connections that might continue to work + d.addCallback(lambda ign: self.close_idle_http_connections()) + # and wait for the client to notice def _poll(): return len(self.c0.storage_broker.get_connected_servers()) == 1 From 8bebb09edd2026a77dd6f8081a1fe7c0069071b3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 3 Nov 2022 14:38:59 -0400 Subject: [PATCH 252/289] Less test-specific way to make test_rref pass. --- src/allmydata/storage/http_client.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 96820d4a5..7fcf8114c 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -398,18 +398,7 @@ class StorageClient(object): kwargs["data"] = dumps(message_to_serialize) headers.addRawHeader("Content-Type", CBOR_MIME_TYPE) - result = self._treq.request(method, url, headers=headers, **kwargs) - - # If we're in test mode, we want an aggressive timeout, e.g. for - # test_rref in test_system.py. Unfortunately, test_rref results in the - # socket still listening(!), only without an HTTP server, due to limits - # in the relevant socket-binding test setup code. As a result, we don't - # get connection refused, the client will successfully connect. So we - # want a timeout so we notice that. - if self.TEST_MODE_REGISTER_HTTP_POOL is not None: - result.addTimeout(5, self._clock) - - return result + return self._treq.request(method, url, headers=headers, **kwargs) @define(hash=True) @@ -426,7 +415,12 @@ class StorageClientGeneral(object): Return the version metadata for the server. """ url = self._client.relative_url("/storage/v1/version") - response = yield self._client.request("GET", url) + result = self._client.request("GET", url) + # 1. Getting the version should never take particularly long. + # 2. Clients rely on the version command for liveness checks of servers. + result.addTimeout(5, self._client._clock) + + response = yield result decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"]) returnValue(decoded_response) From 1e50e96e2456910598862e64f7585a6dd47d59f2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 3 Nov 2022 15:04:41 -0400 Subject: [PATCH 253/289] Update to new test API. --- src/allmydata/test/test_storage_http.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 819c94f83..25c21e03f 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -291,7 +291,9 @@ class CustomHTTPServerTests(SyncTestCase): def setUp(self): super(CustomHTTPServerTests, self).setUp() - StorageClient.start_test_mode() + StorageClient.start_test_mode( + lambda pool: self.addCleanup(pool.closeCachedConnections) + ) # Could be a fixture, but will only be used in this test class so not # going to bother: self._http_server = TestApp() @@ -299,7 +301,7 @@ class CustomHTTPServerTests(SyncTestCase): DecodedURL.from_text("http://127.0.0.1"), SWISSNUM_FOR_TEST, treq=StubTreq(self._http_server._app.resource()), - clock=Clock() + clock=Clock(), ) def test_authorization_enforcement(self): @@ -377,7 +379,9 @@ class HttpTestFixture(Fixture): """ def _setUp(self): - StorageClient.start_test_mode() + StorageClient.start_test_mode( + lambda pool: self.addCleanup(pool.closeCachedConnections) + ) self.clock = Clock() self.tempdir = self.useFixture(TempDir()) # The global Cooperator used by Twisted (a) used by pull producers in @@ -1446,7 +1450,9 @@ class SharedImmutableMutableTestsMixin: self.http.client.request( "GET", self.http.client.relative_url( - "/storage/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + "/storage/v1/{}/{}/1".format( + self.KIND, _encode_si(storage_index) + ) ), headers=headers, ) From 414b4635569145ed277bfe0e0e540d62430e0bb8 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 7 Nov 2022 09:23:04 -0500 Subject: [PATCH 254/289] Use built-in treq timeout feature. --- src/allmydata/storage/http_client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 7fcf8114c..d6121aba2 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -415,12 +415,10 @@ class StorageClientGeneral(object): Return the version metadata for the server. """ url = self._client.relative_url("/storage/v1/version") - result = self._client.request("GET", url) # 1. Getting the version should never take particularly long. # 2. Clients rely on the version command for liveness checks of servers. - result.addTimeout(5, self._client._clock) - - response = yield result + # Thus, a short timeout. + response = yield self._client.request("GET", url, timeout=5) decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"]) returnValue(decoded_response) From c4772482ef19d5e1aeed99f01e38fab52a14786d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 7 Nov 2022 11:19:00 -0500 Subject: [PATCH 255/289] WIP --- src/allmydata/storage/http_client.py | 33 +++++++++++++--- src/allmydata/test/test_storage_http.py | 51 ++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 420d3610f..0bf68fdd3 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -20,8 +20,13 @@ from twisted.web.http_headers import Headers from twisted.web import http from twisted.web.iweb import IPolicyForHTTPS from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred, succeed -from twisted.internet.interfaces import IOpenSSLClientConnectionCreator +from twisted.internet.interfaces import ( + IOpenSSLClientConnectionCreator, + IReactorTime, + IDelayedCall, +) from twisted.internet.ssl import CertificateOptions +from twisted.internet import reactor from twisted.web.client import Agent, HTTPConnectionPool from zope.interface import implementer from hyperlink import DecodedURL @@ -124,16 +129,20 @@ class _LengthLimitedCollector: """ remaining_length: int + timeout_on_silence: IDelayedCall f: BytesIO = field(factory=BytesIO) def __call__(self, data: bytes): + self.timeout_on_silence.reset(60) self.remaining_length -= len(data) if self.remaining_length < 0: raise ValueError("Response length was too long") self.f.write(data) -def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred[BinaryIO]: +def limited_content( + response, max_length: int = 30 * 1024 * 1024, clock: IReactorTime = reactor +) -> Deferred[BinaryIO]: """ Like ``treq.content()``, but limit data read from the response to a set length. If the response is longer than the max allowed length, the result @@ -142,11 +151,16 @@ def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred[Bi A potentially useful future improvement would be using a temporary file to store the content; since filesystem buffering means that would use memory for small responses and disk for large responses. + + This will time out if no data is received for 60 seconds; so long as a + trickle of data continues to arrive, it will continue to run. """ - collector = _LengthLimitedCollector(max_length) + d = succeed(None) + timeout = clock.callLater(60, d.cancel) + collector = _LengthLimitedCollector(max_length, timeout) + # Make really sure everything gets called in Deferred context, treq might # call collector directly... - d = succeed(None) d.addCallback(lambda _: treq.collect(response, collector)) def done(_): @@ -307,6 +321,8 @@ class StorageClient(object): reactor, _StorageClientHTTPSPolicy(expected_spki_hash=certificate_hash), pool=HTTPConnectionPool(reactor, persistent=persistent), + # TCP-level connection timeout + connectTimeout=5, ) ) @@ -337,6 +353,7 @@ class StorageClient(object): write_enabler_secret=None, headers=None, message_to_serialize=None, + timeout: Union[int, float] = 60, **kwargs, ): """ @@ -376,7 +393,9 @@ class StorageClient(object): kwargs["data"] = dumps(message_to_serialize) headers.addRawHeader("Content-Type", CBOR_MIME_TYPE) - return self._treq.request(method, url, headers=headers, **kwargs) + return self._treq.request( + method, url, headers=headers, timeout=timeout, **kwargs + ) class StorageClientGeneral(object): @@ -461,6 +480,9 @@ def read_share_chunk( share_type, _encode_si(storage_index), share_number ) ) + # The default timeout is for getting the response, so it doesn't include + # the time it takes to download the body... so we will will deal with that + # later. response = yield client.request( "GET", url, @@ -469,6 +491,7 @@ def read_share_chunk( # but Range constructor does that the conversion for us. {"range": [Range("bytes", [(offset, offset + length)]).to_header()]} ), + unbuffered=True, # Don't buffer the response in memory. ) if response.code == http.NO_CONTENT: diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 4a912cf6c..54a26da09 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -31,6 +31,8 @@ from klein import Klein from hyperlink import DecodedURL from collections_extended import RangeMap from twisted.internet.task import Clock, Cooperator +from twisted.internet.interfaces import IReactorTime +from twisted.internet.defer import CancelledError, Deferred from twisted.web import http from twisted.web.http_headers import Headers from werkzeug import routing @@ -245,6 +247,7 @@ def gen_bytes(length: int) -> bytes: class TestApp(object): """HTTP API for testing purposes.""" + clock: IReactorTime _app = Klein() _swissnum = SWISSNUM_FOR_TEST # Match what the test client is using @@ -266,6 +269,17 @@ class TestApp(object): """Return bytes to the given length using ``gen_bytes()``.""" return gen_bytes(length) + @_authorized_route(_app, set(), "/slowly_never_finish_result", methods=["GET"]) + def slowly_never_finish_result(self, request, authorization): + """ + Send data immediately, after 59 seconds, after another 59 seconds, and then + never again, without finishing the response. + """ + request.write(b"a") + self.clock.callLater(59, request.write, b"b") + self.clock.callLater(59 + 59, request.write, b"c") + return Deferred() + def result_of(d): """ @@ -299,6 +313,10 @@ class CustomHTTPServerTests(SyncTestCase): SWISSNUM_FOR_TEST, treq=StubTreq(self._http_server._app.resource()), ) + # We're using a Treq private API to get the reactor, alas, but only in + # a test, so not going to worry about it too much. This would be fixed + # if https://github.com/twisted/treq/issues/226 were ever fixed. + self._http_server.clock = self.client._treq._agent._memoryReactor def test_authorization_enforcement(self): """ @@ -367,6 +385,35 @@ class CustomHTTPServerTests(SyncTestCase): with self.assertRaises(ValueError): result_of(limited_content(response, too_short)) + def test_limited_content_silence_causes_timeout(self): + """ + ``http_client.limited_content() times out if it receives no data for 60 + seconds. + """ + response = result_of( + self.client.request( + "GET", + "http://127.0.0.1/slowly_never_finish_result", + ) + ) + + body_deferred = limited_content(response, 4, self._http_server.clock) + result = [] + error = [] + body_deferred.addCallbacks(result.append, error.append) + + for i in range(59 + 59 + 60): + self.assertEqual((result, error), ([], [])) + self._http_server.clock.advance(1) + # Push data between in-memory client and in-memory server: + self.client._treq._agent.flush() + + # After 59 (second write) + 59 (third write) + 60 seconds (quiescent + # timeout) the limited_content() response times out. + self.assertTrue(error) + with self.assertRaises(CancelledError): + error[0].raiseException() + class HttpTestFixture(Fixture): """ @@ -1441,7 +1488,9 @@ class SharedImmutableMutableTestsMixin: self.http.client.request( "GET", self.http.client.relative_url( - "/storage/v1/{}/{}/1".format(self.KIND, _encode_si(storage_index)) + "/storage/v1/{}/{}/1".format( + self.KIND, _encode_si(storage_index) + ) ), headers=headers, ) From f8b9607fc2c1062609eb3bcf42024ad7e81e729f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 7 Nov 2022 11:26:11 -0500 Subject: [PATCH 256/289] Finish up limited_content() timeout code. --- src/allmydata/storage/http_client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 56f7aa629..c76cd00b9 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -164,6 +164,7 @@ def limited_content( d.addCallback(lambda _: treq.collect(response, collector)) def done(_): + timeout.cancel() collector.f.seek(0) return collector.f @@ -539,7 +540,7 @@ def read_share_chunk( raise ValueError("Server sent more than we asked for?!") # It might also send less than we asked for. That's (probably) OK, e.g. # if we went past the end of the file. - body = yield limited_content(response, supposed_length) + body = yield limited_content(response, supposed_length, client._clock) body.seek(0, SEEK_END) actual_length = body.tell() if actual_length != supposed_length: From 2c911eeac1901fbc333d550e6923d225e6ed07cb Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 7 Nov 2022 11:28:36 -0500 Subject: [PATCH 257/289] Make sure everything is using the same clock. --- src/allmydata/test/test_storage_http.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 55bc8f79a..3ee955c3b 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -311,16 +311,18 @@ class CustomHTTPServerTests(SyncTestCase): # Could be a fixture, but will only be used in this test class so not # going to bother: self._http_server = TestApp() + treq = StubTreq(self._http_server._app.resource()) self.client = StorageClient( DecodedURL.from_text("http://127.0.0.1"), SWISSNUM_FOR_TEST, - treq=StubTreq(self._http_server._app.resource()), - clock=Clock(), + treq=treq, + # We're using a Treq private API to get the reactor, alas, but only + # in a test, so not going to worry about it too much. This would be + # fixed if https://github.com/twisted/treq/issues/226 were ever + # fixed. + clock=treq._agent._memoryReactor, ) - # We're using a Treq private API to get the reactor, alas, but only in - # a test, so not going to worry about it too much. This would be fixed - # if https://github.com/twisted/treq/issues/226 were ever fixed. - self._http_server.clock = self.client._treq._agent._memoryReactor + self._http_server.clock = self.client._clock def test_authorization_enforcement(self): """ From afd4f52ff74d4d3f73258ec9ac27e1dea3a928e5 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 7 Nov 2022 11:32:14 -0500 Subject: [PATCH 258/289] News file. --- newsfragments/3940.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3940.minor diff --git a/newsfragments/3940.minor b/newsfragments/3940.minor new file mode 100644 index 000000000..e69de29bb From 65a7945fd9de23ad34c5f17bbf7cfe898243b9e2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 7 Nov 2022 11:39:45 -0500 Subject: [PATCH 259/289] Don't need a connection timeout since we have request-level timeouts. --- src/allmydata/storage/http_client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index c76cd00b9..adf4eb7fa 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -343,8 +343,6 @@ class StorageClient(object): Agent( reactor, _StorageClientHTTPSPolicy(expected_spki_hash=certificate_hash), - # TCP-level connection timeout - connectTimeout=5, pool=pool, ) ) @@ -385,6 +383,8 @@ class StorageClient(object): If ``message_to_serialize`` is set, it will be serialized (by default with CBOR) and set as the request body. + + Default timeout is 60 seconds. """ headers = self._get_headers(headers) @@ -506,9 +506,9 @@ def read_share_chunk( share_type, _encode_si(storage_index), share_number ) ) - # The default timeout is for getting the response, so it doesn't include - # the time it takes to download the body... so we will will deal with that - # later. + # The default 60 second timeout is for getting the response, so it doesn't + # include the time it takes to download the body... so we will will deal + # with that later, via limited_content(). response = yield client.request( "GET", url, From 8d678fe3de4dacdf206e737ef130a91b92004656 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 7 Nov 2022 11:41:50 -0500 Subject: [PATCH 260/289] Increase timeout. --- src/allmydata/test/common_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index f47aad3b6..90990a8ca 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -667,7 +667,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) d.addBoth(lambda x: self.close_idle_http_connections().addCallback(lambda _: x)) - d.addBoth(lambda x: deferLater(reactor, 0.01, lambda: x)) + d.addBoth(lambda x: deferLater(reactor, 0.02, lambda: x)) return d def getdir(self, subdir): From 90f1eb6245d176bc2a9f32098be1971fb0857f51 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 8 Nov 2022 09:24:29 -0500 Subject: [PATCH 261/289] Fix the fURL and NURL links --- docs/proposed/http-storage-node-protocol.rst | 4 ++-- docs/specifications/url.rst | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index 8fe855be3..6643c08f2 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -30,11 +30,11 @@ Glossary introducer a Tahoe-LAFS process at a known location configured to re-publish announcements about the location of storage servers - `fURL `_ + :ref:`fURLs ` a self-authenticating URL-like string which can be used to locate a remote object using the Foolscap protocol (the storage service is an example of such an object) - `NURL `_ + :ref:`NURLs ` a self-authenticating URL-like string almost exactly like a fURL but without being tied to Foolscap swissnum diff --git a/docs/specifications/url.rst b/docs/specifications/url.rst index 421ac57f7..efc7ad76c 100644 --- a/docs/specifications/url.rst +++ b/docs/specifications/url.rst @@ -7,11 +7,11 @@ These are not to be confused with the URI-like capabilities Tahoe-LAFS uses to r An attempt is also made to outline the rationale for certain choices about these URLs. The intended audience for this document is Tahoe-LAFS maintainers and other developers interested in interoperating with Tahoe-LAFS or these URLs. +.. _furls: + Background ---------- -.. _fURLs: - Tahoe-LAFS first used Foolscap_ for network communication. Foolscap connection setup takes as an input a Foolscap URL or a *fURL*. A fURL includes three components: @@ -33,6 +33,8 @@ The client's use of the swissnum is what allows the server to authorize the clie .. _`swiss number`: http://wiki.erights.org/wiki/Swiss_number +.. _NURLs: + NURLs ----- From d1287df62990d7c096e1935718c2f048d1a2039d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 14:02:19 -0500 Subject: [PATCH 262/289] The short timeout should be specific to the storage client's needs. --- src/allmydata/storage/http_client.py | 5 +---- src/allmydata/storage_client.py | 8 ++++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index d6121aba2..d468d2436 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -415,10 +415,7 @@ class StorageClientGeneral(object): Return the version metadata for the server. """ url = self._client.relative_url("/storage/v1/version") - # 1. Getting the version should never take particularly long. - # 2. Clients rely on the version command for liveness checks of servers. - # Thus, a short timeout. - response = yield self._client.request("GET", url, timeout=5) + response = yield self._client.request("GET", url) decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"]) returnValue(decoded_response) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 6f2106f87..140e29607 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -944,12 +944,13 @@ class HTTPNativeStorageServer(service.MultiService): "connected". """ - def __init__(self, server_id: bytes, announcement): + def __init__(self, server_id: bytes, announcement, reactor=reactor): service.MultiService.__init__(self) assert isinstance(server_id, bytes) self._server_id = server_id self.announcement = announcement self._on_status_changed = ObserverList() + self._reactor = reactor furl = announcement["anonymous-storage-FURL"].encode("utf-8") ( self._nickname, @@ -1063,7 +1064,10 @@ class HTTPNativeStorageServer(service.MultiService): self._connect() def _connect(self): - return self._istorage_server.get_version().addCallbacks( + result = self._istorage_server.get_version() + # Set a short timeout since we're relying on this for server liveness. + result.addTimeout(5, self._reactor) + result.addCallbacks( self._got_version, self._failed_to_connect ) From 6c80ad5290c634a3395a3c5a222a15f6ed9f0abe Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 14:13:50 -0500 Subject: [PATCH 263/289] Not necessary. --- src/allmydata/storage/http_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index d468d2436..f0b45742c 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -301,8 +301,8 @@ class StorageClient(object): # ``StorageClient.from_nurl()``. _base_url: DecodedURL _swissnum: bytes - _treq: Union[treq, StubTreq, HTTPClient] = field(eq=False) - _clock: IReactorTime = field(eq=False) + _treq: Union[treq, StubTreq, HTTPClient] + _clock: IReactorTime @classmethod def from_nurl( From d700163aecda5ff23b772c561b5f9a1992b45f82 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 14:14:29 -0500 Subject: [PATCH 264/289] Remove no-longer-relevant comment. --- src/allmydata/storage/http_client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index f0b45742c..cc26d4b37 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -312,8 +312,6 @@ class StorageClient(object): ) -> StorageClient: """ Create a ``StorageClient`` for the given NURL. - - ``persistent`` indicates whether to use persistent HTTP connections. """ assert nurl.fragment == "v=1" assert nurl.scheme == "pb" From 4aeb62b66c12e5d337d6ebeeb26cea8f3f3ff13d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 14:16:41 -0500 Subject: [PATCH 265/289] Use a constant. --- src/allmydata/client.py | 2 +- src/allmydata/storage_client.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index aa03015fc..1e28bb98b 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -826,7 +826,7 @@ class _Client(node.Node, pollmixin.PollMixin): if hasattr(self.tub.negotiationClass, "add_storage_server"): nurls = self.tub.negotiationClass.add_storage_server(ss, swissnum.encode("ascii")) self.storage_nurls = nurls - announcement["anonymous-storage-NURLs"] = [n.to_text() for n in nurls] + announcement[storage_client.ANONYMOUS_STORAGE_NURLS] = [n.to_text() for n in nurls] announcement["anonymous-storage-FURL"] = furl enabled_storage_servers = self._enable_storage_servers( diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 140e29607..59d3406f1 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -80,6 +80,8 @@ from allmydata.storage.http_client import ( ReadVector, TestWriteVectors, WriteVector, TestVector, ClientException ) +ANONYMOUS_STORAGE_NURLS = "anonymous-storage-NURLs" + # who is responsible for de-duplication? # both? @@ -267,8 +269,7 @@ class StorageFarmBroker(service.MultiService): by the given announcement. """ assert isinstance(server_id, bytes) - # TODO use constant for anonymous-storage-NURLs - if len(server["ann"].get("anonymous-storage-NURLs", [])) > 0: + if len(server["ann"].get(ANONYMOUS_STORAGE_NURLS, [])) > 0: s = HTTPNativeStorageServer(server_id, server["ann"]) s.on_status_changed(lambda _: self._got_connection()) return s @@ -961,7 +962,7 @@ class HTTPNativeStorageServer(service.MultiService): ) = _parse_announcement(server_id, furl, announcement) # TODO need some way to do equivalent of Happy Eyeballs for multiple NURLs? # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3935 - nurl = DecodedURL.from_text(announcement["anonymous-storage-NURLs"][0]) + nurl = DecodedURL.from_text(announcement[ANONYMOUS_STORAGE_NURLS][0]) self._istorage_server = _HTTPStorageServer.from_http_client( StorageClient.from_nurl(nurl, reactor) ) From 8e4ac6903298e8081daf4d1947c569d02111d160 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 14:21:31 -0500 Subject: [PATCH 266/289] Stop test mode when done. --- src/allmydata/storage/http_client.py | 5 +++++ src/allmydata/test/common_system.py | 1 + src/allmydata/test/test_storage_http.py | 2 ++ 3 files changed, 8 insertions(+) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index cc26d4b37..fed66bb75 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -297,6 +297,11 @@ class StorageClient(object): """ cls.TEST_MODE_REGISTER_HTTP_POOL = callback + @classmethod + def stop_test_mode(cls): + """Stop testing mode.""" + cls.TEST_MODE_REGISTER_HTTP_POOL = None + # The URL is a HTTPS URL ("https://..."). To construct from a NURL, use # ``StorageClient.from_nurl()``. _base_url: DecodedURL diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 90990a8ca..af86440cc 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -649,6 +649,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def setUp(self): self._http_client_pools = [] http_client.StorageClient.start_test_mode(self._http_client_pools.append) + self.addCleanup(http_client.StorageClient.stop_test_mode) self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 25c21e03f..87a6a2306 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -294,6 +294,7 @@ class CustomHTTPServerTests(SyncTestCase): StorageClient.start_test_mode( lambda pool: self.addCleanup(pool.closeCachedConnections) ) + self.addCleanup(StorageClient.stop_test_mode) # Could be a fixture, but will only be used in this test class so not # going to bother: self._http_server = TestApp() @@ -382,6 +383,7 @@ class HttpTestFixture(Fixture): StorageClient.start_test_mode( lambda pool: self.addCleanup(pool.closeCachedConnections) ) + self.addCleanup(StorageClient.stop_test_mode) self.clock = Clock() self.tempdir = self.useFixture(TempDir()) # The global Cooperator used by Twisted (a) used by pull producers in From fb52b4d302d6f717a4393a518ddbe8fb773e406c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 14:22:30 -0500 Subject: [PATCH 267/289] Delete some garbage. --- src/allmydata/test/common_system.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index af86440cc..0c7d7f747 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -810,8 +810,6 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): if which in feature_matrix.get((section, feature), {which}): config.setdefault(section, {})[feature] = value - #config.setdefault("node", {})["force_foolscap"] = force_foolscap - setnode = partial(setconf, config, which, "node") sethelper = partial(setconf, config, which, "helper") From f3fc4268309316e9200f251df64b27a7bca5f33e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 14:36:14 -0500 Subject: [PATCH 268/289] Switch to [storage] force_foolscap. --- src/allmydata/client.py | 1 + src/allmydata/node.py | 3 +-- src/allmydata/test/common_system.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 1e28bb98b..1a158a1aa 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -104,6 +104,7 @@ _client_config = configutil.ValidConfiguration( "reserved_space", "storage_dir", "plugins", + "force_foolscap", ), "sftpd": ( "accounts.file", diff --git a/src/allmydata/node.py b/src/allmydata/node.py index f572cf7d9..8266fe3fb 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -64,7 +64,6 @@ def _common_valid_config(): "tcp", ), "node": ( - "force_foolscap", "log_gatherer.furl", "nickname", "reveal-ip-address", @@ -916,7 +915,7 @@ def create_main_tub(config, tub_options, # don't want to enable HTTP by default. # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3934 force_foolscap=config.get_config( - "node", "force_foolscap", default=True, boolean=True + "storage", "force_foolscap", default=True, boolean=True ), handler_overrides=handler_overrides, certFile=certfile, diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 0c7d7f747..d49e7831d 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -814,7 +814,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): sethelper = partial(setconf, config, which, "helper") setnode("nickname", u"client %d \N{BLACK SMILING FACE}" % (which,)) - setnode("force_foolscap", str(force_foolscap)) + setconf(config, which, "storage", "force_foolscap", str(force_foolscap)) tub_location_hint, tub_port_endpoint = self.port_assigner.assign(reactor) setnode("tub.port", tub_port_endpoint) From 2a5e8e59715ec647387f77b83733d9541886544b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 15 Nov 2022 15:02:15 -0500 Subject: [PATCH 269/289] Better cleanup. --- src/allmydata/storage_client.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 59d3406f1..8e9ad3656 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -970,6 +970,7 @@ class HTTPNativeStorageServer(service.MultiService): self._connection_status = connection_status.ConnectionStatus.unstarted() self._version = None self._last_connect_time = None + self._connecting_deferred = None def get_permutation_seed(self): return self._permutation_seed @@ -1060,20 +1061,30 @@ class HTTPNativeStorageServer(service.MultiService): def stop_connecting(self): self._lc.stop() + if self._connecting_deferred is not None: + self._connecting_deferred.cancel() def try_to_connect(self): self._connect() def _connect(self): result = self._istorage_server.get_version() + + def remove_connecting_deferred(result): + self._connecting_deferred = None + return result + # Set a short timeout since we're relying on this for server liveness. - result.addTimeout(5, self._reactor) - result.addCallbacks( + self._connecting_deferred = result.addTimeout(5, self._reactor).addBoth( + remove_connecting_deferred).addCallbacks( self._got_version, self._failed_to_connect ) def stopService(self): + if self._connecting_deferred is not None: + self._connecting_deferred.cancel() + result = service.MultiService.stopService(self) if self._lc.running: self._lc.stop() From a20943e10c7d1f4b30b383138f489e9c9dd1eb85 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 16 Nov 2022 09:33:01 -0500 Subject: [PATCH 270/289] As an experiment, see if this fixes failing CI. --- src/allmydata/test/common_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index d49e7831d..8bc25aacf 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -649,7 +649,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def setUp(self): self._http_client_pools = [] http_client.StorageClient.start_test_mode(self._http_client_pools.append) - self.addCleanup(http_client.StorageClient.stop_test_mode) + #self.addCleanup(http_client.StorageClient.stop_test_mode) self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) From 9f5f287473d734932f348d77b89fb81838e5c3d1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 16 Nov 2022 09:57:39 -0500 Subject: [PATCH 271/289] Nope, not helpful. --- src/allmydata/test/common_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 8bc25aacf..d49e7831d 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -649,7 +649,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def setUp(self): self._http_client_pools = [] http_client.StorageClient.start_test_mode(self._http_client_pools.append) - #self.addCleanup(http_client.StorageClient.stop_test_mode) + self.addCleanup(http_client.StorageClient.stop_test_mode) self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) From 2ab172ffca9c6faac1751709ce5db5d17e4e28db Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 16 Nov 2022 10:26:29 -0500 Subject: [PATCH 272/289] Try to set more aggressive timeouts when testing. --- src/allmydata/test/common_system.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index d49e7831d..e75021248 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -648,7 +648,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def setUp(self): self._http_client_pools = [] - http_client.StorageClient.start_test_mode(self._http_client_pools.append) + http_client.StorageClient.start_test_mode(self._got_new_http_connection_pool) self.addCleanup(http_client.StorageClient.stop_test_mode) self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() @@ -657,6 +657,23 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): self.sparent = service.MultiService() self.sparent.startService() + def _got_new_http_connection_pool(self, pool): + # Register the pool for shutdown later: + self._http_client_pools.append(pool) + # Disable retries: + pool.retryAutomatically = False + # Make a much more aggressive timeout for connections, we're connecting + # locally after all... and also make sure it's lower than the delay we + # add in tearDown, to prevent dirty reactor issues. + getConnection = pool.getConnection + + def getConnectionWithTimeout(*args, **kwargs): + d = getConnection(*args, **kwargs) + d.addTimeout(0.05, reactor) + return d + + pool.getConnection = getConnectionWithTimeout + def close_idle_http_connections(self): """Close all HTTP client connections that are just hanging around.""" return defer.gatherResults( @@ -668,7 +685,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) d.addBoth(lambda x: self.close_idle_http_connections().addCallback(lambda _: x)) - d.addBoth(lambda x: deferLater(reactor, 0.02, lambda: x)) + d.addBoth(lambda x: deferLater(reactor, 0.1, lambda: x)) return d def getdir(self, subdir): From 35317373474c5170d9a15b5b9cd895ceb7222391 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 16 Nov 2022 10:36:11 -0500 Subject: [PATCH 273/289] Make timeouts less aggressive, CI machines are slow? --- src/allmydata/test/common_system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index e75021248..8d3019935 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -669,7 +669,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def getConnectionWithTimeout(*args, **kwargs): d = getConnection(*args, **kwargs) - d.addTimeout(0.05, reactor) + d.addTimeout(1, reactor) return d pool.getConnection = getConnectionWithTimeout @@ -685,7 +685,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) d.addBoth(lambda x: self.close_idle_http_connections().addCallback(lambda _: x)) - d.addBoth(lambda x: deferLater(reactor, 0.1, lambda: x)) + d.addBoth(lambda x: deferLater(reactor, 2, lambda: x)) return d def getdir(self, subdir): From 097d918a240ba291ebd6b00108f071362eefcbd3 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 16 Nov 2022 13:37:50 -0500 Subject: [PATCH 274/289] Sigh --- src/allmydata/test/test_storage_https.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/test_storage_https.py b/src/allmydata/test/test_storage_https.py index bacb40290..a9421c3e5 100644 --- a/src/allmydata/test/test_storage_https.py +++ b/src/allmydata/test/test_storage_https.py @@ -179,6 +179,10 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") + # We keep getting TLSMemoryBIOProtocol being left around, so try harder + # to wait for it to finish. + await deferLater(reactor, 0.01) + @async_to_deferred async def test_server_certificate_not_valid_yet(self): """ From d182a2f1865002cc9a3167c1f585413ac6db4307 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 17 Nov 2022 11:01:12 -0500 Subject: [PATCH 275/289] Add the delay to appropriate test. --- src/allmydata/test/test_storage_https.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_storage_https.py b/src/allmydata/test/test_storage_https.py index a9421c3e5..01431267f 100644 --- a/src/allmydata/test/test_storage_https.py +++ b/src/allmydata/test/test_storage_https.py @@ -144,6 +144,10 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") + # We keep getting TLSMemoryBIOProtocol being left around, so try harder + # to wait for it to finish. + await deferLater(reactor, 0.01) + @async_to_deferred async def test_server_certificate_has_wrong_hash(self): """ @@ -179,10 +183,6 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") - # We keep getting TLSMemoryBIOProtocol being left around, so try harder - # to wait for it to finish. - await deferLater(reactor, 0.01) - @async_to_deferred async def test_server_certificate_not_valid_yet(self): """ From 9b21f1da90a1b80414959822fec689040db75d40 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 17 Nov 2022 11:35:10 -0500 Subject: [PATCH 276/289] Increase how many statuses are stored. --- src/allmydata/history.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/history.py b/src/allmydata/history.py index b5cfb7318..06a22ab5d 100644 --- a/src/allmydata/history.py +++ b/src/allmydata/history.py @@ -20,7 +20,7 @@ class History(object): MAX_UPLOAD_STATUSES = 10 MAX_MAPUPDATE_STATUSES = 20 MAX_PUBLISH_STATUSES = 20 - MAX_RETRIEVE_STATUSES = 20 + MAX_RETRIEVE_STATUSES = 40 def __init__(self, stats_provider=None): self.stats_provider = stats_provider From 4c0c75a034568c621ca327b00e881075743254c5 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 18 Nov 2022 13:56:54 -0500 Subject: [PATCH 277/289] Fix DelayedCall leak in tests. --- src/allmydata/storage/http_client.py | 38 ++++++++++++++++--------- src/allmydata/test/test_storage_http.py | 8 ++++-- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 4ed37f901..5b4ec9db8 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -26,7 +26,6 @@ from twisted.internet.interfaces import ( IDelayedCall, ) from twisted.internet.ssl import CertificateOptions -from twisted.internet import reactor from twisted.web.client import Agent, HTTPConnectionPool from zope.interface import implementer from hyperlink import DecodedURL @@ -141,7 +140,9 @@ class _LengthLimitedCollector: def limited_content( - response, max_length: int = 30 * 1024 * 1024, clock: IReactorTime = reactor + response, + clock: IReactorTime, + max_length: int = 30 * 1024 * 1024, ) -> Deferred[BinaryIO]: """ Like ``treq.content()``, but limit data read from the response to a set @@ -168,11 +169,10 @@ def limited_content( collector.f.seek(0) return collector.f - d.addCallback(done) - return d + return d.addCallback(done) -def _decode_cbor(response, schema: Schema): +def _decode_cbor(response, schema: Schema, clock: IReactorTime): """Given HTTP response, return decoded CBOR body.""" def got_content(f: BinaryIO): @@ -183,7 +183,7 @@ def _decode_cbor(response, schema: Schema): if response.code > 199 and response.code < 300: content_type = get_content_type(response.headers) if content_type == CBOR_MIME_TYPE: - return limited_content(response).addCallback(got_content) + return limited_content(response, clock).addCallback(got_content) else: raise ClientException(-1, "Server didn't send CBOR") else: @@ -439,7 +439,9 @@ class StorageClientGeneral(object): """ url = self._client.relative_url("/storage/v1/version") response = yield self._client.request("GET", url) - decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"]) + decoded_response = yield _decode_cbor( + response, _SCHEMAS["get_version"], self._client._clock + ) returnValue(decoded_response) @inlineCallbacks @@ -540,7 +542,7 @@ def read_share_chunk( raise ValueError("Server sent more than we asked for?!") # It might also send less than we asked for. That's (probably) OK, e.g. # if we went past the end of the file. - body = yield limited_content(response, supposed_length, client._clock) + body = yield limited_content(response, client._clock, supposed_length) body.seek(0, SEEK_END) actual_length = body.tell() if actual_length != supposed_length: @@ -627,7 +629,9 @@ class StorageClientImmutables(object): upload_secret=upload_secret, message_to_serialize=message, ) - decoded_response = yield _decode_cbor(response, _SCHEMAS["allocate_buckets"]) + decoded_response = yield _decode_cbor( + response, _SCHEMAS["allocate_buckets"], self._client._clock + ) returnValue( ImmutableCreateResult( already_have=decoded_response["already-have"], @@ -703,7 +707,9 @@ class StorageClientImmutables(object): raise ClientException( response.code, ) - body = yield _decode_cbor(response, _SCHEMAS["immutable_write_share_chunk"]) + body = yield _decode_cbor( + response, _SCHEMAS["immutable_write_share_chunk"], self._client._clock + ) remaining = RangeMap() for chunk in body["required"]: remaining.set(True, chunk["begin"], chunk["end"]) @@ -732,7 +738,9 @@ class StorageClientImmutables(object): url, ) if response.code == http.OK: - body = yield _decode_cbor(response, _SCHEMAS["list_shares"]) + body = yield _decode_cbor( + response, _SCHEMAS["list_shares"], self._client._clock + ) returnValue(set(body)) else: raise ClientException(response.code) @@ -849,7 +857,9 @@ class StorageClientMutables: message_to_serialize=message, ) if response.code == http.OK: - result = await _decode_cbor(response, _SCHEMAS["mutable_read_test_write"]) + result = await _decode_cbor( + response, _SCHEMAS["mutable_read_test_write"], self._client._clock + ) return ReadTestWriteResult(success=result["success"], reads=result["data"]) else: raise ClientException(response.code, (await response.content())) @@ -878,7 +888,9 @@ class StorageClientMutables: ) response = await self._client.request("GET", url) if response.code == http.OK: - return await _decode_cbor(response, _SCHEMAS["mutable_list_shares"]) + return await _decode_cbor( + response, _SCHEMAS["mutable_list_shares"], self._client._clock + ) else: raise ClientException(response.code) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index fa3532839..4f7174c06 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -371,7 +371,9 @@ class CustomHTTPServerTests(SyncTestCase): ) self.assertEqual( - result_of(limited_content(response, at_least_length)).read(), + result_of( + limited_content(response, self._http_server.clock, at_least_length) + ).read(), gen_bytes(length), ) @@ -390,7 +392,7 @@ class CustomHTTPServerTests(SyncTestCase): ) with self.assertRaises(ValueError): - result_of(limited_content(response, too_short)) + result_of(limited_content(response, self._http_server.clock, too_short)) def test_limited_content_silence_causes_timeout(self): """ @@ -404,7 +406,7 @@ class CustomHTTPServerTests(SyncTestCase): ) ) - body_deferred = limited_content(response, 4, self._http_server.clock) + body_deferred = limited_content(response, self._http_server.clock, 4) result = [] error = [] body_deferred.addCallbacks(result.append, error.append) From 8cfdae2ab4005943689a0713ba5bd8f3b0831d9b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 18 Nov 2022 15:26:02 -0500 Subject: [PATCH 278/289] sigh --- src/allmydata/test/test_storage_https.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/allmydata/test/test_storage_https.py b/src/allmydata/test/test_storage_https.py index 01431267f..062eb5b0e 100644 --- a/src/allmydata/test/test_storage_https.py +++ b/src/allmydata/test/test_storage_https.py @@ -183,6 +183,10 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") + # We keep getting TLSMemoryBIOProtocol being left around, so try harder + # to wait for it to finish. + await deferLater(reactor, 0.01) + @async_to_deferred async def test_server_certificate_not_valid_yet(self): """ From 3a613aee704d0d4231f70e82d46bcaed84960692 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 21 Nov 2022 12:24:50 -0500 Subject: [PATCH 279/289] Try a different approach to timeouts: dynamic, instead of hardcoded. --- src/allmydata/test/common_system.py | 30 +++++++++++++++++++++++- src/allmydata/test/test_storage_https.py | 21 ++++------------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 8d3019935..a6b239005 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -20,6 +20,7 @@ from foolscap.api import flushEventualQueue from allmydata import client from allmydata.introducer.server import create_introducer from allmydata.util import fileutil, log, pollmixin +from allmydata.util.deferredutil import async_to_deferred from allmydata.storage import http_client from allmydata.storage_client import ( NativeStorageServer, @@ -639,6 +640,33 @@ def _render_section_values(values): )) +@async_to_deferred +async def spin_until_cleanup_done(value=None, timeout=10): + """ + At the end of the test, spin until either a timeout is hit, or the reactor + has no more DelayedCalls. + + Make sure to register during setUp. + """ + def num_fds(): + if hasattr(reactor, "handles"): + # IOCP! + return len(reactor.handles) + else: + # Normal reactor + return len([r for r in reactor.getReaders() + if r.__class__.__name__ not in ("_UnixWaker", "_SIGCHLDWaker")] + ) + len(reactor.getWriters()) + + for i in range(timeout * 1000): + # There's a single DelayedCall for AsynchronousDeferredRunTest's + # timeout... + if (len(reactor.getDelayedCalls()) < 2 and num_fds() == 0): + break + await deferLater(reactor, 0.001) + return value + + class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): # If set to True, use Foolscap for storage protocol. If set to False, HTTP @@ -685,7 +713,7 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): d = self.sparent.stopService() d.addBoth(flush_but_dont_ignore) d.addBoth(lambda x: self.close_idle_http_connections().addCallback(lambda _: x)) - d.addBoth(lambda x: deferLater(reactor, 2, lambda: x)) + d.addBoth(spin_until_cleanup_done) return d def getdir(self, subdir): diff --git a/src/allmydata/test/test_storage_https.py b/src/allmydata/test/test_storage_https.py index 062eb5b0e..284c8cda8 100644 --- a/src/allmydata/test/test_storage_https.py +++ b/src/allmydata/test/test_storage_https.py @@ -12,7 +12,6 @@ from cryptography import x509 from twisted.internet.endpoints import serverFromString from twisted.internet import reactor -from twisted.internet.task import deferLater from twisted.web.server import Site from twisted.web.static import Data from twisted.web.client import Agent, HTTPConnectionPool, ResponseNeverReceived @@ -30,6 +29,7 @@ from ..storage.http_common import get_spki_hash from ..storage.http_client import _StorageClientHTTPSPolicy from ..storage.http_server import _TLSEndpointWrapper from ..util.deferredutil import async_to_deferred +from .common_system import spin_until_cleanup_done class HTTPSNurlTests(SyncTestCase): @@ -87,6 +87,10 @@ class PinningHTTPSValidation(AsyncTestCase): self.addCleanup(self._port_assigner.tearDown) return AsyncTestCase.setUp(self) + def tearDown(self): + AsyncTestCase.tearDown(self) + return spin_until_cleanup_done() + @asynccontextmanager async def listen(self, private_key_path: FilePath, cert_path: FilePath): """ @@ -107,9 +111,6 @@ class PinningHTTPSValidation(AsyncTestCase): yield f"https://127.0.0.1:{listening_port.getHost().port}/" finally: await listening_port.stopListening() - # Make sure all server connections are closed :( No idea why this - # is necessary when it's not for IStorageServer HTTPS tests. - await deferLater(reactor, 0.01) def request(self, url: str, expected_certificate: x509.Certificate): """ @@ -144,10 +145,6 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") - # We keep getting TLSMemoryBIOProtocol being left around, so try harder - # to wait for it to finish. - await deferLater(reactor, 0.01) - @async_to_deferred async def test_server_certificate_has_wrong_hash(self): """ @@ -183,10 +180,6 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") - # We keep getting TLSMemoryBIOProtocol being left around, so try harder - # to wait for it to finish. - await deferLater(reactor, 0.01) - @async_to_deferred async def test_server_certificate_not_valid_yet(self): """ @@ -206,10 +199,6 @@ class PinningHTTPSValidation(AsyncTestCase): response = await self.request(url, certificate) self.assertEqual(await response.content(), b"YOYODYNE") - # We keep getting TLSMemoryBIOProtocol being left around, so try harder - # to wait for it to finish. - await deferLater(reactor, 0.001) - # A potential attack to test is a private key that doesn't match the # certificate... but OpenSSL (quite rightly) won't let you listen with that # so I don't know how to test that! See From c80469b50bd6f97d98ab22b48ac4b6481020a1df Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 22 Nov 2022 11:55:56 -0500 Subject: [PATCH 280/289] Handle the Windows waker too. --- src/allmydata/test/common_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index a6b239005..ca2904b53 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -655,7 +655,7 @@ async def spin_until_cleanup_done(value=None, timeout=10): else: # Normal reactor return len([r for r in reactor.getReaders() - if r.__class__.__name__ not in ("_UnixWaker", "_SIGCHLDWaker")] + if r.__class__.__name__ not in ("_UnixWaker", "_SIGCHLDWaker", "_SocketWaker")] ) + len(reactor.getWriters()) for i in range(timeout * 1000): From 62400d29b3819994e6c777f77cd0ec7e3ecb5def Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 09:36:53 -0500 Subject: [PATCH 281/289] Seems like Ubuntu 22.04 has issues with Tor at the moment --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0327014ca..ad055da2f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -163,7 +163,7 @@ jobs: matrix: os: - windows-latest - - ubuntu-latest + - ubuntu-20.04 python-version: - 3.7 - 3.9 From 4fd92a915bb312a2e2bf4f185112570b8d32d393 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 09:43:45 -0500 Subject: [PATCH 282/289] Install tor on any ubuntu version. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad055da2f..4e5c9a757 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -175,7 +175,7 @@ jobs: steps: - name: Install Tor [Ubuntu] - if: matrix.os == 'ubuntu-latest' + if: ${{ contains(matrix.os, 'ubuntu') }} run: sudo apt install tor # TODO: See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3744. From 7f1d7d4f46847ea83a78d85dea649b45d78583dd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 09:53:07 -0500 Subject: [PATCH 283/289] Better explanation. --- src/allmydata/test/common_system.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index ca2904b53..297046cc5 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -643,10 +643,16 @@ def _render_section_values(values): @async_to_deferred async def spin_until_cleanup_done(value=None, timeout=10): """ - At the end of the test, spin until either a timeout is hit, or the reactor - has no more DelayedCalls. + At the end of the test, spin until the reactor has no more DelayedCalls + and file descriptors (or equivalents) registered. This prevents dirty + reactor errors, while also not hard-coding a fixed amount of time, so it + can finish faster on faster computers. - Make sure to register during setUp. + There is also a timeout: if it takes more than 10 seconds (by default) for + the remaining reactor state to clean itself up, the presumption is that it + will never get cleaned up and the spinning stops. + + Make sure to run as last thing in tearDown. """ def num_fds(): if hasattr(reactor, "handles"): From 6c3e9e670de208e7b5d2dc37d192de2c3d464e80 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 09:53:11 -0500 Subject: [PATCH 284/289] Link to issue. --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4e5c9a757..99ac28926 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -163,6 +163,8 @@ jobs: matrix: os: - windows-latest + # 22.04 has some issue with Tor at the moment: + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3943 - ubuntu-20.04 python-version: - 3.7 From 2ab8e3e8d20087521dc4aa7ffb358e3f65a7a6aa Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 28 Nov 2022 10:02:56 -0500 Subject: [PATCH 285/289] Cancel timeout on failures too. --- src/allmydata/storage/http_client.py | 7 ++++++- src/allmydata/test/test_storage_http.py | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 5b4ec9db8..73fba9888 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -169,7 +169,12 @@ def limited_content( collector.f.seek(0) return collector.f - return d.addCallback(done) + def failed(f): + if timeout.active(): + timeout.cancel() + return f + + return d.addCallbacks(done, failed) def _decode_cbor(response, schema: Schema, clock: IReactorTime): diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 4f7174c06..8dbe18545 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -280,6 +280,14 @@ class TestApp(object): self.clock.callLater(59 + 59, request.write, b"c") return Deferred() + @_authorized_route(_app, set(), "/die_unfinished", methods=["GET"]) + def die(self, request, authorization): + """ + Dies half-way. + """ + request.transport.loseConnection() + return Deferred() + def result_of(d): """ @@ -423,6 +431,22 @@ class CustomHTTPServerTests(SyncTestCase): with self.assertRaises(CancelledError): error[0].raiseException() + def test_limited_content_cancels_timeout_on_failed_response(self): + """ + If the response fails somehow, the timeout is still cancelled. + """ + response = result_of( + self.client.request( + "GET", + "http://127.0.0.1/die", + ) + ) + + d = limited_content(response, self._http_server.clock, 4) + with self.assertRaises(ValueError): + result_of(d) + self.assertEqual(len(self._http_server.clock.getDelayedCalls()), 0) + class HttpTestFixture(Fixture): """ From 38d7430c570fd3ff9b2b3ea720706d6d3198fbfa Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 28 Nov 2022 10:03:42 -0500 Subject: [PATCH 286/289] Simplify. --- src/allmydata/storage/http_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 73fba9888..5abc44bdd 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -382,7 +382,7 @@ class StorageClient(object): write_enabler_secret=None, headers=None, message_to_serialize=None, - timeout: Union[int, float] = 60, + timeout: float = 60, **kwargs, ): """ From 0f4dc9129538dbbe8b88073c3a5047462f4209a2 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 28 Nov 2022 10:12:08 -0500 Subject: [PATCH 287/289] Refactor so internal attributes needn't leak. --- src/allmydata/storage/http_client.py | 63 +++++++++++++--------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 5abc44bdd..79bf061c9 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -177,26 +177,6 @@ def limited_content( return d.addCallbacks(done, failed) -def _decode_cbor(response, schema: Schema, clock: IReactorTime): - """Given HTTP response, return decoded CBOR body.""" - - def got_content(f: BinaryIO): - data = f.read() - schema.validate_cbor(data) - return loads(data) - - if response.code > 199 and response.code < 300: - content_type = get_content_type(response.headers) - if content_type == CBOR_MIME_TYPE: - return limited_content(response, clock).addCallback(got_content) - else: - raise ClientException(-1, "Server didn't send CBOR") - else: - return treq.content(response).addCallback( - lambda data: fail(ClientException(response.code, response.phrase, data)) - ) - - @define class ImmutableCreateResult(object): """Result of creating a storage index for an immutable.""" @@ -428,6 +408,25 @@ class StorageClient(object): method, url, headers=headers, timeout=timeout, **kwargs ) + def decode_cbor(self, response, schema: Schema): + """Given HTTP response, return decoded CBOR body.""" + + def got_content(f: BinaryIO): + data = f.read() + schema.validate_cbor(data) + return loads(data) + + if response.code > 199 and response.code < 300: + content_type = get_content_type(response.headers) + if content_type == CBOR_MIME_TYPE: + return limited_content(response, self._clock).addCallback(got_content) + else: + raise ClientException(-1, "Server didn't send CBOR") + else: + return treq.content(response).addCallback( + lambda data: fail(ClientException(response.code, response.phrase, data)) + ) + @define(hash=True) class StorageClientGeneral(object): @@ -444,8 +443,8 @@ class StorageClientGeneral(object): """ url = self._client.relative_url("/storage/v1/version") response = yield self._client.request("GET", url) - decoded_response = yield _decode_cbor( - response, _SCHEMAS["get_version"], self._client._clock + decoded_response = yield self._client.decode_cbor( + response, _SCHEMAS["get_version"] ) returnValue(decoded_response) @@ -634,8 +633,8 @@ class StorageClientImmutables(object): upload_secret=upload_secret, message_to_serialize=message, ) - decoded_response = yield _decode_cbor( - response, _SCHEMAS["allocate_buckets"], self._client._clock + decoded_response = yield self._client.decode_cbor( + response, _SCHEMAS["allocate_buckets"] ) returnValue( ImmutableCreateResult( @@ -712,8 +711,8 @@ class StorageClientImmutables(object): raise ClientException( response.code, ) - body = yield _decode_cbor( - response, _SCHEMAS["immutable_write_share_chunk"], self._client._clock + body = yield self._client.decode_cbor( + response, _SCHEMAS["immutable_write_share_chunk"] ) remaining = RangeMap() for chunk in body["required"]: @@ -743,9 +742,7 @@ class StorageClientImmutables(object): url, ) if response.code == http.OK: - body = yield _decode_cbor( - response, _SCHEMAS["list_shares"], self._client._clock - ) + body = yield self._client.decode_cbor(response, _SCHEMAS["list_shares"]) returnValue(set(body)) else: raise ClientException(response.code) @@ -862,8 +859,8 @@ class StorageClientMutables: message_to_serialize=message, ) if response.code == http.OK: - result = await _decode_cbor( - response, _SCHEMAS["mutable_read_test_write"], self._client._clock + result = await self._client.decode_cbor( + response, _SCHEMAS["mutable_read_test_write"] ) return ReadTestWriteResult(success=result["success"], reads=result["data"]) else: @@ -893,8 +890,8 @@ class StorageClientMutables: ) response = await self._client.request("GET", url) if response.code == http.OK: - return await _decode_cbor( - response, _SCHEMAS["mutable_list_shares"], self._client._clock + return await self._client.decode_cbor( + response, _SCHEMAS["mutable_list_shares"] ) else: raise ClientException(response.code) From 3ba166c2cb939a58fdb16dad06cd0dbd1ad39961 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 28 Nov 2022 10:20:12 -0500 Subject: [PATCH 288/289] A bit more robust code. --- src/allmydata/test/common_system.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 297046cc5..01966824a 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -659,10 +659,11 @@ async def spin_until_cleanup_done(value=None, timeout=10): # IOCP! return len(reactor.handles) else: - # Normal reactor - return len([r for r in reactor.getReaders() - if r.__class__.__name__ not in ("_UnixWaker", "_SIGCHLDWaker", "_SocketWaker")] - ) + len(reactor.getWriters()) + # Normal reactor; having internal readers still registered is fine, + # that's not our code. + return len( + set(reactor.getReaders()) - set(reactor._internalReaders) + ) + len(reactor.getWriters()) for i in range(timeout * 1000): # There's a single DelayedCall for AsynchronousDeferredRunTest's From aa80c9ef4748cf10e3b448b298df8b589c35cafd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 28 Nov 2022 10:21:59 -0500 Subject: [PATCH 289/289] Be more robust. --- src/allmydata/test/test_storage_https.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_storage_https.py b/src/allmydata/test/test_storage_https.py index 284c8cda8..a11b0eed5 100644 --- a/src/allmydata/test/test_storage_https.py +++ b/src/allmydata/test/test_storage_https.py @@ -12,6 +12,7 @@ from cryptography import x509 from twisted.internet.endpoints import serverFromString from twisted.internet import reactor +from twisted.internet.defer import maybeDeferred from twisted.web.server import Site from twisted.web.static import Data from twisted.web.client import Agent, HTTPConnectionPool, ResponseNeverReceived @@ -88,8 +89,8 @@ class PinningHTTPSValidation(AsyncTestCase): return AsyncTestCase.setUp(self) def tearDown(self): - AsyncTestCase.tearDown(self) - return spin_until_cleanup_done() + d = maybeDeferred(AsyncTestCase.tearDown, self) + return d.addCallback(lambda _: spin_until_cleanup_done()) @asynccontextmanager async def listen(self, private_key_path: FilePath, cert_path: FilePath):