From 488a04cb9b8af59171f02908fb4d6d00474865c9 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Sep 2022 17:42:06 -0600 Subject: [PATCH 01/60] exit when stdin closes --- src/allmydata/scripts/tahoe_run.py | 47 ++++++++++++++++++++++++++-- src/allmydata/test/test_runner.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 51be32ee3..68578a2a1 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -20,7 +20,9 @@ from allmydata.scripts.common import BasedirOptions from twisted.scripts import twistd from twisted.python import usage from twisted.python.reflect import namedAny -from twisted.internet.defer import maybeDeferred +from twisted.internet.defer import maybeDeferred, Deferred +from twisted.internet.protocol import Protocol +from twisted.internet.stdio import StandardIO from twisted.application.service import Service from allmydata.scripts.default_nodedir import _default_nodedir @@ -148,6 +150,8 @@ class DaemonizeTheRealService(Service, HookMixin): def startService(self): + from twisted.internet import reactor + def start(): node_to_instance = { u"client": lambda: maybeDeferred(namedAny("allmydata.client.create_client"), self.basedir), @@ -187,12 +191,14 @@ class DaemonizeTheRealService(Service, HookMixin): def created(srv): srv.setServiceParent(self.parent) + # exiting on stdin-closed facilitates cleanup when run + # as a subprocess + on_stdin_close(reactor, reactor.stop) d.addCallback(created) d.addErrback(handle_config_error) d.addBoth(self._call_hook, 'running') return d - from twisted.internet import reactor reactor.callWhenRunning(start) @@ -206,6 +212,43 @@ class DaemonizeTahoeNodePlugin(object): return DaemonizeTheRealService(self.nodetype, self.basedir, so) +def on_stdin_close(reactor, fn): + """ + Arrange for the function `fn` to run when our stdin closes + """ + when_closed_d = Deferred() + + class WhenClosed(Protocol): + """ + Notify a Deferred when our connection is lost .. as this is passed + to twisted's StandardIO class, it is used to detect our parent + going away. + """ + + def connectionLost(self, reason): + when_closed_d.callback(None) + + def on_close(arg): + try: + fn() + except Exception: + # for our "exit" use-case, this will _mostly_ just be + # ReactorNotRunning (because we're already shutting down + # when our stdin closes) but no matter what "bad thing" + # happens we just want to ignore it. + pass + return arg + + when_closed_d.addBoth(on_close) + # we don't need to do anything with this instance because it gets + # hooked into the reactor and thus remembered + StandardIO( + proto=WhenClosed(), + reactor=reactor, + ) + return None + + def run(config, runApp=twistd.runApp): """ Runs a Tahoe-LAFS node in the foreground. diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 3eb6b8a34..fdd31c37d 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -46,6 +46,9 @@ from twisted.internet.defer import ( inlineCallbacks, DeferredList, ) +from twisted.internet.testing import ( + MemoryReactorClock, +) from twisted.python.filepath import FilePath from twisted.python.runtime import ( platform, @@ -57,6 +60,9 @@ import allmydata from allmydata.scripts.runner import ( parse_options, ) +from allmydata.scripts.tahoe_run import ( + on_stdin_close, +) from .common import ( PIPE, @@ -621,3 +627,46 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): # What's left is a perfect indicator that the process has exited and # we won't get blamed for leaving the reactor dirty. yield client_running + + +class OnStdinCloseTests(SyncTestCase): + """ + Tests for on_stdin_close + """ + + def test_close_called(self): + """ + our on-close method is called when stdin closes + """ + reactor = MemoryReactorClock() + called = [] + + def onclose(): + called.append(True) + on_stdin_close(reactor, onclose) + self.assertEqual(called, []) + + reader = list(reactor.readers)[0] + reader.loseConnection() + reactor.advance(1) # ProcessReader does a callLater(0, ..) + + self.assertEqual(called, [True]) + + def test_exception_ignored(self): + """ + an exception from or on-close function is ignored + """ + reactor = MemoryReactorClock() + called = [] + + def onclose(): + called.append(True) + raise RuntimeError("unexpected error") + on_stdin_close(reactor, onclose) + self.assertEqual(called, []) + + reader = list(reactor.readers)[0] + reader.loseConnection() + reactor.advance(1) # ProcessReader does a callLater(0, ..) + + self.assertEqual(called, [True]) From 1e6381ca7f45bea90dc3518abd7ad0c0f79d7670 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Sep 2022 17:44:01 -0600 Subject: [PATCH 02/60] news --- newsfragments/3921.feature | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 newsfragments/3921.feature diff --git a/newsfragments/3921.feature b/newsfragments/3921.feature new file mode 100644 index 000000000..f2c3a98bd --- /dev/null +++ b/newsfragments/3921.feature @@ -0,0 +1,5 @@ +Automatically exit when stdin is closed + +This facilitates subprocess management, specifically cleanup. +When a parent process is running tahoe and exits without time to do "proper" cleanup at least the stdin descriptor will be closed. +Subsequently "tahoe run" notices this and exits. \ No newline at end of file From 768829e993d957e6d4a78134fcd16cb5d2e92295 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Sep 2022 21:22:45 -0600 Subject: [PATCH 03/60] more robust --- src/allmydata/test/test_runner.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index fdd31c37d..8424bec6a 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -646,7 +646,8 @@ class OnStdinCloseTests(SyncTestCase): on_stdin_close(reactor, onclose) self.assertEqual(called, []) - reader = list(reactor.readers)[0] + for reader in reactor.getReaders(): + reader.loseConnection() reader.loseConnection() reactor.advance(1) # ProcessReader does a callLater(0, ..) @@ -665,7 +666,8 @@ class OnStdinCloseTests(SyncTestCase): on_stdin_close(reactor, onclose) self.assertEqual(called, []) - reader = list(reactor.readers)[0] + for reader in reactor.getReaders(): + reader.loseConnection() reader.loseConnection() reactor.advance(1) # ProcessReader does a callLater(0, ..) From 00c785ec7697167cbe8c37b9f50bd9be95690395 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Sep 2022 21:47:28 -0600 Subject: [PATCH 04/60] debug windows --- src/allmydata/test/test_runner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 8424bec6a..74d7ac59f 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -646,6 +646,8 @@ class OnStdinCloseTests(SyncTestCase): on_stdin_close(reactor, onclose) self.assertEqual(called, []) + print("READERS", reactor.getReaders()) + for reader in reactor.getReaders(): reader.loseConnection() reader.loseConnection() From decb36a8f6f4d755b51c6f2b2e624d77dcf31899 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Sep 2022 22:20:07 -0600 Subject: [PATCH 05/60] refactor for Windows testing --- src/allmydata/scripts/tahoe_run.py | 6 +++--- src/allmydata/test/test_runner.py | 27 ++++++++++++++++----------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 68578a2a1..63dc351b1 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -241,12 +241,12 @@ def on_stdin_close(reactor, fn): when_closed_d.addBoth(on_close) # we don't need to do anything with this instance because it gets - # hooked into the reactor and thus remembered - StandardIO( + # hooked into the reactor and thus remembered .. but we return it + # for Windows testing purposes. + return StandardIO( proto=WhenClosed(), reactor=reactor, ) - return None def run(config, runApp=twistd.runApp): diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 74d7ac59f..fc3ed9618 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -643,15 +643,18 @@ class OnStdinCloseTests(SyncTestCase): def onclose(): called.append(True) - on_stdin_close(reactor, onclose) + proto = on_stdin_close(reactor, onclose) self.assertEqual(called, []) - print("READERS", reactor.getReaders()) - - for reader in reactor.getReaders(): - reader.loseConnection() - reader.loseConnection() - reactor.advance(1) # ProcessReader does a callLater(0, ..) + # one Unix we can just close all the readers, correctly + # "simulating" a stdin close .. of course, Windows has to be + # difficult + if platform.isWindows(): + proto.loseConnection() + else: + for reader in reactor.getReaders(): + reader.loseConnection() + reactor.advance(1) # ProcessReader does a callLater(0, ..) self.assertEqual(called, [True]) @@ -668,9 +671,11 @@ class OnStdinCloseTests(SyncTestCase): on_stdin_close(reactor, onclose) self.assertEqual(called, []) - for reader in reactor.getReaders(): - reader.loseConnection() - reader.loseConnection() - reactor.advance(1) # ProcessReader does a callLater(0, ..) + if platform.isWindows(): + proto.loseConnection() + else: + for reader in reactor.getReaders(): + reader.loseConnection() + reactor.advance(1) # ProcessReader does a callLater(0, ..) self.assertEqual(called, [True]) From 711f6d39e7281cad447fd51ae06c16e8d3247384 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Sep 2022 22:29:19 -0600 Subject: [PATCH 06/60] missing proto --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index fc3ed9618..c4bdee3fb 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -668,7 +668,7 @@ class OnStdinCloseTests(SyncTestCase): def onclose(): called.append(True) raise RuntimeError("unexpected error") - on_stdin_close(reactor, onclose) + proto = on_stdin_close(reactor, onclose) self.assertEqual(called, []) if platform.isWindows(): From 1058e50c50f47635646c13d20b264d1579cf3de4 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 8 Sep 2022 16:30:30 -0600 Subject: [PATCH 07/60] close properly --- src/allmydata/test/test_runner.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index c4bdee3fb..bce5b3c20 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -646,11 +646,12 @@ class OnStdinCloseTests(SyncTestCase): proto = on_stdin_close(reactor, onclose) self.assertEqual(called, []) - # one Unix we can just close all the readers, correctly + # on Unix we can just close all the readers, correctly # "simulating" a stdin close .. of course, Windows has to be # difficult if platform.isWindows(): - proto.loseConnection() + proto.writeConnectionLost() + proto.readConnectionLost() else: for reader in reactor.getReaders(): reader.loseConnection() @@ -672,7 +673,8 @@ class OnStdinCloseTests(SyncTestCase): self.assertEqual(called, []) if platform.isWindows(): - proto.loseConnection() + proto.writeConnectionLost() + proto.readConnectionLost() else: for reader in reactor.getReaders(): reader.loseConnection() From c296071767a9c8d62e227aae6cdd824abfc7d331 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 22 Nov 2022 14:11:58 -0500 Subject: [PATCH 08/60] News file. --- newsfragments/3939.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3939.bugfix diff --git a/newsfragments/3939.bugfix b/newsfragments/3939.bugfix new file mode 100644 index 000000000..61fb4244a --- /dev/null +++ b/newsfragments/3939.bugfix @@ -0,0 +1 @@ +Uploading immutables will now use more bandwidth, which should allow for faster uploads in many cases. \ No newline at end of file From a4787ca45ebebf3c59216366b5edf3a56f548003 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 22 Nov 2022 14:12:14 -0500 Subject: [PATCH 09/60] Batch writes much more aggressively. --- src/allmydata/immutable/layout.py | 69 ++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index d552d43c4..863b1cb75 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -113,13 +113,14 @@ class WriteBucketProxy(object): fieldstruct = ">L" def __init__(self, rref, server, data_size, block_size, num_segments, - num_share_hashes, uri_extension_size, pipeline_size=50000): + num_share_hashes, uri_extension_size, batch_size=1_000_000): self._rref = rref self._server = server self._data_size = data_size self._block_size = block_size self._num_segments = num_segments self._written_bytes = 0 + self._to_write = b"" effective_segments = mathutil.next_power_of_k(num_segments,2) self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE @@ -130,11 +131,13 @@ class WriteBucketProxy(object): self._create_offsets(block_size, data_size) - # k=3, max_segment_size=128KiB gives us a typical segment of 43691 - # bytes. Setting the default pipeline_size to 50KB lets us get two - # segments onto the wire but not a third, which would keep the pipe - # filled. - self._pipeline = pipeline.Pipeline(pipeline_size) + # With a ~1MB batch size, max upload speed is 1MB * round-trip latency + # assuming the writing code waits for writes to finish, so 20MB/sec if + # latency is 50ms. In the US many people only have 1MB/sec upload speed + # as of 2022 (standard Comcast). For further discussion of how one + # might set batch sizes see + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3787#comment:1. + self._batch_size = batch_size def get_allocated_size(self): return (self._offsets['uri_extension'] + self.fieldsize + @@ -179,7 +182,7 @@ class WriteBucketProxy(object): return "" % self._server.get_name() def put_header(self): - return self._write(0, self._offset_data) + return self._queue_write(0, self._offset_data) def put_block(self, segmentnum, data): offset = self._offsets['data'] + segmentnum * self._block_size @@ -193,13 +196,13 @@ class WriteBucketProxy(object): (self._block_size * (self._num_segments - 1))), len(data), self._block_size) - return self._write(offset, data) + return self._queue_write(offset, data) def put_crypttext_hashes(self, hashes): # plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and # so is not explicitly written, but we need to write everything, so # fill it in with nulls. - d = self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + d = self._queue_write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) d.addCallback(lambda _: self._really_put_crypttext_hashes(hashes)) return d @@ -212,7 +215,7 @@ class WriteBucketProxy(object): precondition(offset + len(data) <= self._offsets['block_hashes'], offset, len(data), offset+len(data), self._offsets['block_hashes']) - return self._write(offset, data) + return self._queue_write(offset, data) def put_block_hashes(self, blockhashes): offset = self._offsets['block_hashes'] @@ -223,7 +226,7 @@ class WriteBucketProxy(object): precondition(offset + len(data) <= self._offsets['share_hashes'], offset, len(data), offset+len(data), self._offsets['share_hashes']) - return self._write(offset, data) + return self._queue_write(offset, data) def put_share_hashes(self, sharehashes): # sharehashes is a list of (index, hash) tuples, so they get stored @@ -237,29 +240,45 @@ class WriteBucketProxy(object): precondition(offset + len(data) <= self._offsets['uri_extension'], offset, len(data), offset+len(data), self._offsets['uri_extension']) - return self._write(offset, data) + return self._queue_write(offset, data) def put_uri_extension(self, data): offset = self._offsets['uri_extension'] assert isinstance(data, bytes) precondition(len(data) == self._uri_extension_size) length = struct.pack(self.fieldstruct, len(data)) - return self._write(offset, length+data) + return self._queue_write(offset, length+data) - def _write(self, offset, data): - # use a Pipeline to pipeline several writes together. TODO: another - # speedup would be to coalesce small writes into a single call: this - # would reduce the foolscap CPU overhead per share, but wouldn't - # reduce the number of round trips, so it might not be worth the - # effort. - self._written_bytes += len(data) - return self._pipeline.add(len(data), - self._rref.callRemote, "write", offset, data) + def _queue_write(self, offset, data): + """ + This queues up small writes to be written in a single batched larger + write. + + Callers of this function are expected to queue the data in order, with + no holes. As such, the offset is technically unnecessary, but is used + to check the inputs. Possibly we should get rid of it. + """ + assert offset == self._written_bytes + len(self._to_write) + self._to_write += data + if len(self._to_write) >= self._batch_size: + return self._actually_write() + else: + return defer.succeed(False) + + def _actually_write(self): + """Actually write data to the server.""" + offset = self._written_bytes + data = self._to_write + self._written_bytes += len(self._to_write) + self._to_write = b"" + return self._rref.callRemote("write", offset, data) def close(self): - assert self._written_bytes == self.get_allocated_size(), f"{self._written_bytes} != {self.get_allocated_size()}" - d = self._pipeline.add(0, self._rref.callRemote, "close") - d.addCallback(lambda ign: self._pipeline.flush()) + assert self._written_bytes + len(self._to_write) == self.get_allocated_size(), ( + f"{self._written_bytes} + {len(self._to_write)} != {self.get_allocated_size()}" + ) + d = self._actually_write() + d.addCallback(lambda _: self._rref.callRemote("close")) return d def abort(self): From f638aec0af901ca763c2d5e4f034a3c45f2787bb Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 22 Nov 2022 14:22:54 -0500 Subject: [PATCH 10/60] Refactor to use BytesIO. --- src/allmydata/immutable/layout.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 863b1cb75..477fdf159 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -1,21 +1,14 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import struct +from io import BytesIO from zope.interface import implementer from twisted.internet import defer from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE -from allmydata.util import mathutil, observer, pipeline, log +from allmydata.util import mathutil, observer, log from allmydata.util.assertutil import precondition from allmydata.storage.server import si_b2a @@ -120,7 +113,7 @@ class WriteBucketProxy(object): self._block_size = block_size self._num_segments = num_segments self._written_bytes = 0 - self._to_write = b"" + self._to_write = BytesIO() effective_segments = mathutil.next_power_of_k(num_segments,2) self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE @@ -258,9 +251,10 @@ class WriteBucketProxy(object): no holes. As such, the offset is technically unnecessary, but is used to check the inputs. Possibly we should get rid of it. """ - assert offset == self._written_bytes + len(self._to_write) - self._to_write += data - if len(self._to_write) >= self._batch_size: + queued_size = len(self._to_write.getbuffer()) + assert offset == self._written_bytes + queued_size + self._to_write.write(data) + if queued_size + len(data) >= self._batch_size: return self._actually_write() else: return defer.succeed(False) @@ -268,14 +262,14 @@ class WriteBucketProxy(object): def _actually_write(self): """Actually write data to the server.""" offset = self._written_bytes - data = self._to_write - self._written_bytes += len(self._to_write) - self._to_write = b"" + data = self._to_write.getvalue() + self._written_bytes += len(data) + self._to_write = BytesIO() return self._rref.callRemote("write", offset, data) def close(self): - assert self._written_bytes + len(self._to_write) == self.get_allocated_size(), ( - f"{self._written_bytes} + {len(self._to_write)} != {self.get_allocated_size()}" + assert self._written_bytes + len(self._to_write.getbuffer()) == self.get_allocated_size(), ( + f"{self._written_bytes} + {len(self._to_write.getbuffer())} != {self.get_allocated_size()}" ) d = self._actually_write() d.addCallback(lambda _: self._rref.callRemote("close")) From d86d578034030975593341e8730d2e9e74f2ba6c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 22 Nov 2022 15:17:56 -0500 Subject: [PATCH 11/60] Refactor to make core data structure easier to test in isolation. --- src/allmydata/immutable/layout.py | 77 ++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 477fdf159..b9eb74d8f 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -2,8 +2,12 @@ Ported to Python 3. """ +from __future__ import annotations + import struct from io import BytesIO + +from attrs import define, field from zope.interface import implementer from twisted.internet import defer from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ @@ -100,6 +104,43 @@ def make_write_bucket_proxy(rref, server, num_share_hashes, uri_extension_size) return wbp + +@define +class _WriteBuffer: + """ + Queue up small writes to be written in a single batched larger write. + """ + _batch_size: int + _to_write : BytesIO = field(factory=BytesIO) + _written_bytes : int = field(default=0) + + def queue_write(self, offset: int, data: bytes) -> bool: + """ + Queue a write. If the result is ``False``, no further action is needed + for now. If the result is some ``True``, it's time to call ``flush()`` + and do a real write. + + Callers of this function are expected to queue the data in order, with + no holes. As such, the offset is technically unnecessary, but is used + to check the inputs. Possibly we should get rid of it. + """ + assert offset == self.get_total_bytes_queued() + self._to_write.write(data) + return len(self._to_write.getbuffer()) >= self._batch_size + + def flush(self) -> tuple[int, bytes]: + """Return offset and data to be written.""" + offset = self._written_bytes + data = self._to_write.getvalue() + self._written_bytes += len(data) + self._to_write = BytesIO() + return (offset, data) + + def get_total_bytes_queued(self) -> int: + """Return how many bytes were written or queued in total.""" + return self._written_bytes + len(self._to_write.getbuffer()) + + @implementer(IStorageBucketWriter) class WriteBucketProxy(object): fieldsize = 4 @@ -112,8 +153,6 @@ class WriteBucketProxy(object): self._data_size = data_size self._block_size = block_size self._num_segments = num_segments - self._written_bytes = 0 - self._to_write = BytesIO() effective_segments = mathutil.next_power_of_k(num_segments,2) self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE @@ -130,7 +169,7 @@ class WriteBucketProxy(object): # as of 2022 (standard Comcast). For further discussion of how one # might set batch sizes see # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3787#comment:1. - self._batch_size = batch_size + self._write_buffer = _WriteBuffer(batch_size) def get_allocated_size(self): return (self._offsets['uri_extension'] + self.fieldsize + @@ -251,25 +290,19 @@ class WriteBucketProxy(object): no holes. As such, the offset is technically unnecessary, but is used to check the inputs. Possibly we should get rid of it. """ - queued_size = len(self._to_write.getbuffer()) - assert offset == self._written_bytes + queued_size - self._to_write.write(data) - if queued_size + len(data) >= self._batch_size: + if self._write_buffer.queue_write(offset, data): return self._actually_write() else: return defer.succeed(False) def _actually_write(self): - """Actually write data to the server.""" - offset = self._written_bytes - data = self._to_write.getvalue() - self._written_bytes += len(data) - self._to_write = BytesIO() + """Write data to the server.""" + offset, data = self._write_buffer.flush() return self._rref.callRemote("write", offset, data) def close(self): - assert self._written_bytes + len(self._to_write.getbuffer()) == self.get_allocated_size(), ( - f"{self._written_bytes} + {len(self._to_write.getbuffer())} != {self.get_allocated_size()}" + assert self._write_buffer.get_total_bytes_queued() == self.get_allocated_size(), ( + f"{self._written_buffer.get_total_bytes_queued()} != {self.get_allocated_size()}" ) d = self._actually_write() d.addCallback(lambda _: self._rref.callRemote("close")) @@ -384,16 +417,16 @@ class ReadBucketProxy(object): self._fieldsize = fieldsize self._fieldstruct = fieldstruct - for field in ( 'data', - 'plaintext_hash_tree', # UNUSED - 'crypttext_hash_tree', - 'block_hashes', - 'share_hashes', - 'uri_extension', - ): + for field_name in ( 'data', + 'plaintext_hash_tree', # UNUSED + 'crypttext_hash_tree', + 'block_hashes', + 'share_hashes', + 'uri_extension', + ): offset = struct.unpack(fieldstruct, data[x:x+fieldsize])[0] x += fieldsize - self._offsets[field] = offset + self._offsets[field_name] = offset return self._offsets def _get_block_data(self, unused, blocknum, blocksize, thisblocksize): From d1deda5fdd00f0a9b03bda9a0c1be7e494c90089 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 10:09:53 -0500 Subject: [PATCH 12/60] Unit tests for _WriteBuffer. --- src/allmydata/test/test_storage.py | 47 +++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 134609f81..f5762f616 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -3,14 +3,9 @@ Tests for allmydata.storage. Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import native_str, PY2, bytes_to_native_str, bchr -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations +from future.utils import native_str, bytes_to_native_str, bchr from six import ensure_str from io import ( @@ -59,7 +54,7 @@ from allmydata.storage.common import storage_index_to_dir, \ si_b2a, si_a2b from allmydata.storage.lease import LeaseInfo from allmydata.immutable.layout import WriteBucketProxy, WriteBucketProxy_v2, \ - ReadBucketProxy + ReadBucketProxy, _WriteBuffer from allmydata.mutable.layout import MDMFSlotWriteProxy, MDMFSlotReadProxy, \ LayoutInvalid, MDMFSIGNABLEHEADER, \ SIGNED_PREFIX, MDMFHEADER, \ @@ -3746,3 +3741,39 @@ class LeaseInfoTests(SyncTestCase): info.to_mutable_data(), HasLength(info.mutable_size()), ) + + +class WriteBufferTests(SyncTestCase): + """Tests for ``_WriteBuffer``.""" + + @given( + small_writes=strategies.lists( + strategies.binary(min_size=1, max_size=20), + min_size=10, max_size=20), + batch_size=strategies.integers(min_value=5, max_value=10) + ) + def test_write_buffer(self, small_writes: list[bytes], batch_size: int): + """ + ``_WriteBuffer`` coalesces small writes into bigger writes based on + the batch size. + """ + offset = 0 + wb = _WriteBuffer(batch_size) + result = b"" + for data in small_writes: + should_flush = wb.queue_write(offset, data) + offset += len(data) + if should_flush: + flushed_offset, flushed_data = wb.flush() + self.assertEqual(flushed_offset, len(result)) + # The flushed data is in batch sizes, or closest approximation + # given queued inputs: + self.assertTrue(batch_size <= len(flushed_data) < batch_size + len(data)) + result += flushed_data + + # Final flush: + flushed_offset, flushed_data = wb.flush() + self.assertEqual(flushed_offset, len(result)) + result += flushed_data + + self.assertEqual(result, b"".join(small_writes)) From fd9e50adf1efcb0878a7dcc14bc0ac1d3a3c620c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 10:13:18 -0500 Subject: [PATCH 13/60] Simplify _WriteBuffer slightly. --- src/allmydata/immutable/layout.py | 10 +++------- src/allmydata/test/test_storage.py | 4 +--- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index b9eb74d8f..cb41b0594 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -114,17 +114,12 @@ class _WriteBuffer: _to_write : BytesIO = field(factory=BytesIO) _written_bytes : int = field(default=0) - def queue_write(self, offset: int, data: bytes) -> bool: + def queue_write(self, data: bytes) -> bool: """ Queue a write. If the result is ``False``, no further action is needed for now. If the result is some ``True``, it's time to call ``flush()`` and do a real write. - - Callers of this function are expected to queue the data in order, with - no holes. As such, the offset is technically unnecessary, but is used - to check the inputs. Possibly we should get rid of it. """ - assert offset == self.get_total_bytes_queued() self._to_write.write(data) return len(self._to_write.getbuffer()) >= self._batch_size @@ -290,7 +285,8 @@ class WriteBucketProxy(object): no holes. As such, the offset is technically unnecessary, but is used to check the inputs. Possibly we should get rid of it. """ - if self._write_buffer.queue_write(offset, data): + assert offset == self._write_buffer.get_total_bytes_queued() + if self._write_buffer.queue_write(data): return self._actually_write() else: return defer.succeed(False) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index f5762f616..820d4fd79 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -3757,12 +3757,10 @@ class WriteBufferTests(SyncTestCase): ``_WriteBuffer`` coalesces small writes into bigger writes based on the batch size. """ - offset = 0 wb = _WriteBuffer(batch_size) result = b"" for data in small_writes: - should_flush = wb.queue_write(offset, data) - offset += len(data) + should_flush = wb.queue_write(data) if should_flush: flushed_offset, flushed_data = wb.flush() self.assertEqual(flushed_offset, len(result)) From 37902802646d873e0cbcd4b508c59406d5e3967e Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 10:15:19 -0500 Subject: [PATCH 14/60] Documentation. --- src/allmydata/immutable/encode.py | 2 ++ src/allmydata/immutable/layout.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 874492785..2b6602773 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -262,6 +262,8 @@ class Encoder(object): d.addCallback(lambda res: self.finish_hashing()) + # These calls have to happen in order, and waiting for previous one + # also ensures backpressure: d.addCallback(lambda res: self.send_crypttext_hash_tree_to_all_shareholders()) d.addCallback(lambda res: self.send_all_block_hash_trees()) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index cb41b0594..562ca4470 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -138,6 +138,10 @@ class _WriteBuffer: @implementer(IStorageBucketWriter) class WriteBucketProxy(object): + """ + Note: The various ``put_`` methods need to be called in the order in which the + bytes will get written. + """ fieldsize = 4 fieldstruct = ">L" From 41533f162e4d78b2ece85a09e1cc9ecf79810f76 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 23 Nov 2022 10:20:32 -0500 Subject: [PATCH 15/60] Not used anymore. --- src/allmydata/test/test_pipeline.py | 198 ---------------------------- src/allmydata/util/pipeline.py | 149 --------------------- 2 files changed, 347 deletions(-) delete mode 100644 src/allmydata/test/test_pipeline.py delete mode 100644 src/allmydata/util/pipeline.py diff --git a/src/allmydata/test/test_pipeline.py b/src/allmydata/test/test_pipeline.py deleted file mode 100644 index 31d952836..000000000 --- a/src/allmydata/test/test_pipeline.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Tests for allmydata.util.pipeline. - -Ported to Python 3. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -import gc - -from twisted.internet import defer -from twisted.trial import unittest -from twisted.python import log -from twisted.python.failure import Failure - -from allmydata.util import pipeline - - -class Pipeline(unittest.TestCase): - def pause(self, *args, **kwargs): - d = defer.Deferred() - self.calls.append( (d, args, kwargs) ) - return d - - def failUnlessCallsAre(self, expected): - #print(self.calls) - #print(expected) - self.failUnlessEqual(len(self.calls), len(expected), self.calls) - for i,c in enumerate(self.calls): - self.failUnlessEqual(c[1:], expected[i], str(i)) - - def test_basic(self): - self.calls = [] - finished = [] - p = pipeline.Pipeline(100) - - d = p.flush() # fires immediately - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 1) - finished = [] - - d = p.add(10, self.pause, "one") - # the call should start right away, and our return Deferred should - # fire right away - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 1) - self.failUnlessEqual(finished[0], None) - self.failUnlessCallsAre([ ( ("one",) , {} ) ]) - self.failUnlessEqual(p.gauge, 10) - - # pipeline: [one] - - finished = [] - d = p.add(20, self.pause, "two", kw=2) - # pipeline: [one, two] - - # the call and the Deferred should fire right away - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 1) - self.failUnlessEqual(finished[0], None) - self.failUnlessCallsAre([ ( ("one",) , {} ), - ( ("two",) , {"kw": 2} ), - ]) - self.failUnlessEqual(p.gauge, 30) - - self.calls[0][0].callback("one-result") - # pipeline: [two] - self.failUnlessEqual(p.gauge, 20) - - finished = [] - d = p.add(90, self.pause, "three", "posarg1") - # pipeline: [two, three] - flushed = [] - fd = p.flush() - fd.addCallbacks(flushed.append, log.err) - self.failUnlessEqual(flushed, []) - - # the call will be made right away, but the return Deferred will not, - # because the pipeline is now full. - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 0) - self.failUnlessCallsAre([ ( ("one",) , {} ), - ( ("two",) , {"kw": 2} ), - ( ("three", "posarg1"), {} ), - ]) - self.failUnlessEqual(p.gauge, 110) - - self.failUnlessRaises(pipeline.SingleFileError, p.add, 10, self.pause) - - # retiring either call will unblock the pipeline, causing the #3 - # Deferred to fire - self.calls[2][0].callback("three-result") - # pipeline: [two] - - self.failUnlessEqual(len(finished), 1) - self.failUnlessEqual(finished[0], None) - self.failUnlessEqual(flushed, []) - - # retiring call#2 will finally allow the flush() Deferred to fire - self.calls[1][0].callback("two-result") - self.failUnlessEqual(len(flushed), 1) - - def test_errors(self): - self.calls = [] - p = pipeline.Pipeline(100) - - d1 = p.add(200, self.pause, "one") - d2 = p.flush() - - finished = [] - d1.addBoth(finished.append) - self.failUnlessEqual(finished, []) - - flushed = [] - d2.addBoth(flushed.append) - self.failUnlessEqual(flushed, []) - - self.calls[0][0].errback(ValueError("oops")) - - self.failUnlessEqual(len(finished), 1) - f = finished[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - self.failUnlessIn("PipelineError", str(f.value)) - self.failUnlessIn("ValueError", str(f.value)) - r = repr(f.value) - self.failUnless("ValueError" in r, r) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - self.failUnlessEqual(len(flushed), 1) - f = flushed[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - # now that the pipeline is in the failed state, any new calls will - # fail immediately - - d3 = p.add(20, self.pause, "two") - - finished = [] - d3.addBoth(finished.append) - self.failUnlessEqual(len(finished), 1) - f = finished[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - r = repr(f.value) - self.failUnless("ValueError" in r, r) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - d4 = p.flush() - flushed = [] - d4.addBoth(flushed.append) - self.failUnlessEqual(len(flushed), 1) - f = flushed[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - def test_errors2(self): - self.calls = [] - p = pipeline.Pipeline(100) - - d1 = p.add(10, self.pause, "one") - d2 = p.add(20, self.pause, "two") - d3 = p.add(30, self.pause, "three") - d4 = p.flush() - - # one call fails, then the second one succeeds: make sure - # ExpandableDeferredList tolerates the second one - - flushed = [] - d4.addBoth(flushed.append) - self.failUnlessEqual(flushed, []) - - self.calls[0][0].errback(ValueError("oops")) - self.failUnlessEqual(len(flushed), 1) - f = flushed[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - self.calls[1][0].callback("two-result") - self.calls[2][0].errback(ValueError("three-error")) - - del d1,d2,d3,d4 - gc.collect() # for PyPy diff --git a/src/allmydata/util/pipeline.py b/src/allmydata/util/pipeline.py deleted file mode 100644 index 31f5d5d49..000000000 --- a/src/allmydata/util/pipeline.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -A pipeline of Deferreds. - -Ported to Python 3. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -from twisted.internet import defer -from twisted.python.failure import Failure -from twisted.python import log -from allmydata.util.assertutil import precondition - - -class PipelineError(Exception): - """One of the pipelined messages returned an error. The received Failure - object is stored in my .error attribute.""" - def __init__(self, error): - self.error = error - - def __repr__(self): - return "" % (self.error,) - def __str__(self): - return "" % (self.error,) - -class SingleFileError(Exception): - """You are not permitted to add a job to a full pipeline.""" - - -class ExpandableDeferredList(defer.Deferred, object): - # like DeferredList(fireOnOneErrback=True) with a built-in - # gatherResults(), but you can add new Deferreds until you close it. This - # gives you a chance to add don't-complain-about-unhandled-error errbacks - # immediately after attachment, regardless of whether you actually end up - # wanting the list or not. - def __init__(self): - defer.Deferred.__init__(self) - self.resultsReceived = 0 - self.resultList = [] - self.failure = None - self.closed = False - - def addDeferred(self, d): - precondition(not self.closed, "don't call addDeferred() on a closed ExpandableDeferredList") - index = len(self.resultList) - self.resultList.append(None) - d.addCallbacks(self._cbDeferred, self._ebDeferred, - callbackArgs=(index,)) - return d - - def close(self): - self.closed = True - self.checkForFinished() - - def checkForFinished(self): - if not self.closed: - return - if self.called: - return - if self.failure: - self.errback(self.failure) - elif self.resultsReceived == len(self.resultList): - self.callback(self.resultList) - - def _cbDeferred(self, res, index): - self.resultList[index] = res - self.resultsReceived += 1 - self.checkForFinished() - return res - - def _ebDeferred(self, f): - self.failure = f - self.checkForFinished() - return f - - -class Pipeline(object): - """I manage a size-limited pipeline of Deferred operations, usually - callRemote() messages.""" - - def __init__(self, capacity): - self.capacity = capacity # how full we can be - self.gauge = 0 # how full we are - self.failure = None - self.waiting = [] # callers of add() who are blocked - self.unflushed = ExpandableDeferredList() - - def add(self, _size, _func, *args, **kwargs): - # We promise that all the Deferreds we return will fire in the order - # they were returned. To make it easier to keep this promise, we - # prohibit multiple outstanding calls to add() . - if self.waiting: - raise SingleFileError - if self.failure: - return defer.fail(self.failure) - self.gauge += _size - fd = defer.maybeDeferred(_func, *args, **kwargs) - fd.addBoth(self._call_finished, _size) - self.unflushed.addDeferred(fd) - fd.addErrback(self._eat_pipeline_errors) - fd.addErrback(log.err, "_eat_pipeline_errors didn't eat it") - if self.gauge < self.capacity: - return defer.succeed(None) - d = defer.Deferred() - self.waiting.append(d) - return d - - def flush(self): - if self.failure: - return defer.fail(self.failure) - d, self.unflushed = self.unflushed, ExpandableDeferredList() - d.close() - d.addErrback(self._flushed_error) - return d - - def _flushed_error(self, f): - precondition(self.failure) # should have been set by _call_finished - return self.failure - - def _call_finished(self, res, size): - self.gauge -= size - if isinstance(res, Failure): - res = Failure(PipelineError(res)) - if not self.failure: - self.failure = res - if self.failure: - while self.waiting: - d = self.waiting.pop(0) - d.errback(self.failure) - else: - while self.waiting and (self.gauge < self.capacity): - d = self.waiting.pop(0) - d.callback(None) - # the d.callback() might trigger a new call to add(), which - # will raise our gauge and might cause the pipeline to be - # filled. So the while() loop gets a chance to tell the - # caller to stop. - return res - - def _eat_pipeline_errors(self, f): - f.trap(PipelineError) - return None From 562111012e4418707ec141c8f488d36ea61325ae Mon Sep 17 00:00:00 2001 From: Sajith Sasidharan Date: Sat, 26 Nov 2022 18:18:05 -0600 Subject: [PATCH 16/60] Give GITHUB_TOKEN just enough permissions to run the workflow --- .github/workflows/ci.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0327014ca..588e71747 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,16 @@ on: - "master" pull_request: +# At the start of each workflow run, GitHub creates a unique +# GITHUB_TOKEN secret to use in the workflow. It is a good idea for +# this GITHUB_TOKEN to have the minimum of permissions. See: +# +# - https://docs.github.com/en/actions/security-guides/automatic-token-authentication +# - https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#permissions +# +permissions: + contents: read + # Control to what degree jobs in this workflow will run concurrently with # other instances of themselves. # From 9bd384ac2db0199c446ebcffefffb01cccf1e2de Mon Sep 17 00:00:00 2001 From: Sajith Sasidharan Date: Sat, 26 Nov 2022 18:18:44 -0600 Subject: [PATCH 17/60] Add news fragment --- newsfragments/3944.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3944.minor diff --git a/newsfragments/3944.minor b/newsfragments/3944.minor new file mode 100644 index 000000000..e69de29bb From 5e6189e1159432e30b55340a9230d1ea317971ce Mon Sep 17 00:00:00 2001 From: Sajith Sasidharan Date: Sat, 26 Nov 2022 18:25:19 -0600 Subject: [PATCH 18/60] Use newer version of actions/setup-python --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 588e71747..bd757fe08 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -73,7 +73,7 @@ jobs: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -208,7 +208,7 @@ jobs: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -268,7 +268,7 @@ jobs: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} From 23d8d1cb01682a13ad788bdf832513c1cddc63ed Mon Sep 17 00:00:00 2001 From: Sajith Sasidharan Date: Sat, 26 Nov 2022 18:28:57 -0600 Subject: [PATCH 19/60] Use action/setup-python@v4's caching feature --- .github/workflows/ci.yml | 48 +++------------------------------------- 1 file changed, 3 insertions(+), 45 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd757fe08..6c608e888 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -76,25 +76,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - # To use pip caching with GitHub Actions in an OS-independent - # manner, we need `pip cache dir` command, which became - # available since pip v20.1+. At the time of writing this, - # GitHub Actions offers pip v20.3.3 for both ubuntu-latest and - # windows-latest, and pip v20.3.1 for macos-latest. - - name: Get pip cache directory - id: pip-cache - run: | - echo "::set-output name=dir::$(pip cache dir)" - - # See https://github.com/actions/cache - - name: Use pip cache - uses: actions/cache@v2 - with: - path: ${{ steps.pip-cache.outputs.dir }} - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: 'pip' # caching pip dependencies - name: Install Python packages run: | @@ -211,19 +193,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - - name: Get pip cache directory - id: pip-cache - run: | - echo "::set-output name=dir::$(pip cache dir)" - - - name: Use pip cache - uses: actions/cache@v2 - with: - path: ${{ steps.pip-cache.outputs.dir }} - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: 'pip' # caching pip dependencies - name: Install Python packages run: | @@ -271,19 +241,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - - name: Get pip cache directory - id: pip-cache - run: | - echo "::set-output name=dir::$(pip cache dir)" - - - name: Use pip cache - uses: actions/cache@v2 - with: - path: ${{ steps.pip-cache.outputs.dir }} - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: 'pip' # caching pip dependencies - name: Install Python packages run: | From 15881da348dfa9c9f92836f59175e3582fdab8cb Mon Sep 17 00:00:00 2001 From: Sajith Sasidharan Date: Sat, 26 Nov 2022 18:37:46 -0600 Subject: [PATCH 20/60] Use newer version of actions/checkout --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6c608e888..4447e539c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,7 +68,7 @@ jobs: # See https://github.com/actions/checkout. A fetch-depth of 0 # fetches all tags and branches. - name: Check out Tahoe-LAFS sources - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 @@ -185,7 +185,7 @@ jobs: args: install tor - name: Check out Tahoe-LAFS sources - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 @@ -233,7 +233,7 @@ jobs: steps: - name: Check out Tahoe-LAFS sources - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 From 26d30979c0fc3345c78846aaf37db1a7f83610eb Mon Sep 17 00:00:00 2001 From: Sajith Sasidharan Date: Sat, 26 Nov 2022 18:38:48 -0600 Subject: [PATCH 21/60] Use newer version of actions/upload-artifact --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4447e539c..64a60bd04 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,13 +90,13 @@ jobs: run: python -m tox - name: Upload eliot.log - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 with: name: eliot.log path: eliot.log - name: Upload trial log - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 with: name: test.log path: _trial_temp/test.log @@ -212,7 +212,7 @@ jobs: run: tox -e integration - name: Upload eliot.log in case of failure - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v3 if: failure() with: name: integration.eliot.json @@ -259,7 +259,7 @@ jobs: run: dist/Tahoe-LAFS/tahoe --version - name: Upload PyInstaller package - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Tahoe-LAFS-${{ matrix.os }}-Python-${{ matrix.python-version }} path: dist/Tahoe-LAFS-*-*.* From 7715972429c34d4c6a684f184ab5f4ba1613df16 Mon Sep 17 00:00:00 2001 From: Sajith Sasidharan Date: Sat, 26 Nov 2022 18:40:19 -0600 Subject: [PATCH 22/60] Use newer version of crazy-max/ghaction-chocolatey --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 64a60bd04..169e981ed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -180,7 +180,7 @@ jobs: - name: Install Tor [Windows] if: matrix.os == 'windows-latest' - uses: crazy-max/ghaction-chocolatey@v1 + uses: crazy-max/ghaction-chocolatey@v2 with: args: install tor From d7fe25f7c7ffc1e8eb9da45755085c5dd04c25d8 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 29 Nov 2022 10:49:20 -0500 Subject: [PATCH 23/60] Correct the assertion about how "not found" should be handled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Behavior verified visually against a live client node: ``` ❯ curl -v 'http://localhost:3456/uri/URI:CHK:cmtcxq7hwxvfxan34yiev6ivhy:qvcekmjtoetdcw4kmi7b3rtblvgx7544crnwaqtiewemdliqsokq:1:1:1' * Trying 127.0.0.1:3456... * Connected to localhost (127.0.0.1) port 3456 (#0) > GET /uri/URI:CHK:cmtcxq7hwxvfxan34yiev6ivhy:qvcekmjtoetdcw4kmi7b3rtblvgx7544crnwaqtiewemdliqsokq:1:1:1 HTTP/1.1 > Host: localhost:3456 > User-Agent: curl/7.83.1 > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 410 Gone < X-Frame-Options: DENY < Referrer-Policy: no-referrer < Server: TwistedWeb/22.10.0 < Date: Tue, 29 Nov 2022 15:39:47 GMT < Content-Type: text/plain;charset=utf-8 < Accept-Ranges: bytes < Content-Length: 294 < ETag: ui2tnwl5lltj5clzpyff42jdce- < NoSharesError: no shares could be found. Zero shares usually indicates a corrupt URI, or that no servers were connected, but it might also indicate severe corruption. You should perform a filecheck on this object to learn more. The full error message is: * Connection #0 to host localhost left intact no shares (need 1). Last failure: None ``` --- src/allmydata/test/test_testing.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/allmydata/test/test_testing.py b/src/allmydata/test/test_testing.py index 3715d1aca..07bebb7a1 100644 --- a/src/allmydata/test/test_testing.py +++ b/src/allmydata/test/test_testing.py @@ -9,18 +9,7 @@ """ Tests for the allmydata.testing helpers - -Ported to Python 3. - """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from twisted.internet.defer import ( inlineCallbacks, @@ -56,10 +45,12 @@ from testtools.matchers import ( IsInstance, MatchesStructure, AfterPreprocessing, + Contains, ) from testtools.twistedsupport import ( succeeded, ) +from twisted.web.http import GONE class FakeWebTest(SyncTestCase): @@ -144,7 +135,8 @@ class FakeWebTest(SyncTestCase): def test_download_missing(self): """ - Error if we download a capability that doesn't exist + The response to a request to download a capability that doesn't exist + is 410 (GONE). """ http_client = create_tahoe_treq_client() @@ -157,7 +149,11 @@ class FakeWebTest(SyncTestCase): resp, succeeded( MatchesStructure( - code=Equals(500) + code=Equals(GONE), + content=AfterPreprocessing( + lambda m: m(), + succeeded(Contains(b"No data for")), + ), ) ) ) From 02aeb68f1731c58e146ccefd1d8ea99ad364b73d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 29 Nov 2022 10:51:07 -0500 Subject: [PATCH 24/60] Take care with str vs bytes in the implementation Also replace the intentional BAD_REQUEST with GONE for this case. --- src/allmydata/testing/web.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/allmydata/testing/web.py b/src/allmydata/testing/web.py index bb858b555..6538dc3a4 100644 --- a/src/allmydata/testing/web.py +++ b/src/allmydata/testing/web.py @@ -6,22 +6,13 @@ # This file is part of Tahoe-LAFS. # # See the docs/about.rst file for licensing information. -"""Test-helpers for clients that use the WebUI. - -Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - +Test-helpers for clients that use the WebUI. +""" import hashlib +from typing import Dict import attr @@ -147,7 +138,7 @@ class _FakeTahoeUriHandler(Resource, object): isLeaf = True - data = attr.ib(default=attr.Factory(dict)) + data: Dict[bytes, bytes] = attr.ib(default=attr.Factory(dict)) capability_generators = attr.ib(default=attr.Factory(dict)) def _generate_capability(self, kind): @@ -209,7 +200,7 @@ class _FakeTahoeUriHandler(Resource, object): capability = None for arg, value in uri.query: if arg == u"uri": - capability = value + capability = value.encode("utf-8") # it's legal to use the form "/uri/" if capability is None and request.postpath and request.postpath[0]: capability = request.postpath[0] @@ -221,10 +212,9 @@ class _FakeTahoeUriHandler(Resource, object): # the user gave us a capability; if our Grid doesn't have any # data for it, that's an error. - capability = capability.encode('ascii') if capability not in self.data: - request.setResponseCode(http.BAD_REQUEST) - return u"No data for '{}'".format(capability.decode('ascii')) + request.setResponseCode(http.GONE) + return u"No data for '{}'".format(capability.decode('ascii')).encode("utf-8") return self.data[capability] From 6c0e5f5807cf72d38b9fb4a9461d0fd085920360 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 29 Nov 2022 10:52:02 -0500 Subject: [PATCH 25/60] news fragment --- newsfragments/3874.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3874.minor diff --git a/newsfragments/3874.minor b/newsfragments/3874.minor new file mode 100644 index 000000000..e69de29bb From 4367e5a0fcfd5c905195b741eec727eb2416096d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 30 Nov 2022 09:28:58 -0500 Subject: [PATCH 26/60] Bump the Twisted dependency so we can do this --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 768e44e29..a3b3d5b98 100644 --- a/setup.py +++ b/setup.py @@ -96,7 +96,9 @@ install_requires = [ # an sftp extra in Tahoe-LAFS, there is no point in having one. # * Twisted 19.10 introduces Site.getContentFile which we use to get # temporary upload files placed into a per-node temporary directory. - "Twisted[tls,conch] >= 19.10.0", + # * Twisted 22.8.0 added support for coroutine-returning functions in many + # places (mainly via `maybeDeferred`) + "Twisted[tls,conch] >= 22.8.0", "PyYAML >= 3.11", From 5cebe91406c5d9db2c4b5ce150f85a3fd50322e7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 30 Nov 2022 09:29:57 -0500 Subject: [PATCH 27/60] update the module docstring --- src/allmydata/test/mutable/test_version.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/mutable/test_version.py b/src/allmydata/test/mutable/test_version.py index d5c44f204..aa6fb539f 100644 --- a/src/allmydata/test/mutable/test_version.py +++ b/src/allmydata/test/mutable/test_version.py @@ -1,5 +1,6 @@ """ -Ported to Python 3. +Tests related to the way ``allmydata.mutable`` handles different versions +of data for an object. """ from __future__ import print_function from __future__ import absolute_import From 1acf8604eff5227ed372b81eac20bc08677a853a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 30 Nov 2022 09:30:08 -0500 Subject: [PATCH 28/60] Remove the Py2/Py3 compatibility header --- src/allmydata/test/mutable/test_version.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/allmydata/test/mutable/test_version.py b/src/allmydata/test/mutable/test_version.py index aa6fb539f..669baa8db 100644 --- a/src/allmydata/test/mutable/test_version.py +++ b/src/allmydata/test/mutable/test_version.py @@ -2,17 +2,9 @@ Tests related to the way ``allmydata.mutable`` handles different versions of data for an object. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from io import StringIO import os -from six.moves import cStringIO as StringIO from twisted.internet import defer from ..common import AsyncTestCase From a11eeaf240d1fde831e571ad5b5df3ebeed97168 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 30 Nov 2022 09:30:37 -0500 Subject: [PATCH 29/60] Convert all of the asynchronous functions to use `async` and `await` --- src/allmydata/test/mutable/test_version.py | 546 +++++++++------------ 1 file changed, 228 insertions(+), 318 deletions(-) diff --git a/src/allmydata/test/mutable/test_version.py b/src/allmydata/test/mutable/test_version.py index 669baa8db..d14cc9295 100644 --- a/src/allmydata/test/mutable/test_version.py +++ b/src/allmydata/test/mutable/test_version.py @@ -5,8 +5,8 @@ of data for an object. from io import StringIO import os +from typing import Optional -from twisted.internet import defer from ..common import AsyncTestCase from testtools.matchers import ( Equals, @@ -40,343 +40,269 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ self.small_data = b"test data" * 10 # 90 B; SDMF - def do_upload_mdmf(self, data=None): + async def do_upload_mdmf(self, data: Optional[bytes] = None) -> MutableFileNode: if data is None: data = self.data - d = self.nm.create_mutable_file(MutableData(data), - version=MDMF_VERSION) - def _then(n): - self.assertThat(n, IsInstance(MutableFileNode)) - self.assertThat(n._protocol_version, Equals(MDMF_VERSION)) - self.mdmf_node = n - return n - d.addCallback(_then) - return d + n = await self.nm.create_mutable_file(MutableData(data), + version=MDMF_VERSION) + self.assertThat(n, IsInstance(MutableFileNode)) + self.assertThat(n._protocol_version, Equals(MDMF_VERSION)) + self.mdmf_node = n + return n - def do_upload_sdmf(self, data=None): + async def do_upload_sdmf(self, data: Optional[bytes] = None) -> MutableFileNode: if data is None: data = self.small_data - d = self.nm.create_mutable_file(MutableData(data)) - def _then(n): - self.assertThat(n, IsInstance(MutableFileNode)) - self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) - self.sdmf_node = n - return n - d.addCallback(_then) - return d + n = await self.nm.create_mutable_file(MutableData(data)) + self.assertThat(n, IsInstance(MutableFileNode)) + self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) + self.sdmf_node = n + return n - def do_upload_empty_sdmf(self): - d = self.nm.create_mutable_file(MutableData(b"")) - def _then(n): - self.assertThat(n, IsInstance(MutableFileNode)) - self.sdmf_zero_length_node = n - self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) - return n - d.addCallback(_then) - return d + async def do_upload_empty_sdmf(self) -> MutableFileNode: + n = await self.nm.create_mutable_file(MutableData(b"")) + self.assertThat(n, IsInstance(MutableFileNode)) + self.sdmf_zero_length_node = n + self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) + return n - def do_upload(self): - d = self.do_upload_mdmf() - d.addCallback(lambda ign: self.do_upload_sdmf()) - return d + async def do_upload(self) -> MutableFileNode: + await self.do_upload_mdmf() + return await self.do_upload_sdmf() - def test_debug(self): - d = self.do_upload_mdmf() - def _debug(n): - fso = debug.FindSharesOptions() - storage_index = base32.b2a(n.get_storage_index()) - fso.si_s = str(storage_index, "utf-8") # command-line options are unicode on Python 3 - fso.nodedirs = [os.path.dirname(abspath_expanduser_unicode(str(storedir))) - for (i,ss,storedir) - in self.iterate_servers()] - fso.stdout = StringIO() - fso.stderr = StringIO() - debug.find_shares(fso) - sharefiles = fso.stdout.getvalue().splitlines() - expected = self.nm.default_encoding_parameters["n"] - self.assertThat(sharefiles, HasLength(expected)) + async def test_debug(self) -> None: + n = await self.do_upload_mdmf() + fso = debug.FindSharesOptions() + storage_index = base32.b2a(n.get_storage_index()) + fso.si_s = str(storage_index, "utf-8") # command-line options are unicode on Python 3 + fso.nodedirs = [os.path.dirname(abspath_expanduser_unicode(str(storedir))) + for (i,ss,storedir) + in self.iterate_servers()] + fso.stdout = StringIO() + fso.stderr = StringIO() + debug.find_shares(fso) + sharefiles = fso.stdout.getvalue().splitlines() + expected = self.nm.default_encoding_parameters["n"] + self.assertThat(sharefiles, HasLength(expected)) - do = debug.DumpOptions() - do["filename"] = sharefiles[0] - do.stdout = StringIO() - debug.dump_share(do) - output = do.stdout.getvalue() - lines = set(output.splitlines()) - self.assertTrue("Mutable slot found:" in lines, output) - self.assertTrue(" share_type: MDMF" in lines, output) - self.assertTrue(" num_extra_leases: 0" in lines, output) - self.assertTrue(" MDMF contents:" in lines, output) - self.assertTrue(" seqnum: 1" in lines, output) - self.assertTrue(" required_shares: 3" in lines, output) - self.assertTrue(" total_shares: 10" in lines, output) - self.assertTrue(" segsize: 131073" in lines, output) - self.assertTrue(" datalen: %d" % len(self.data) in lines, output) - vcap = str(n.get_verify_cap().to_string(), "utf-8") - self.assertTrue(" verify-cap: %s" % vcap in lines, output) - cso = debug.CatalogSharesOptions() - cso.nodedirs = fso.nodedirs - cso.stdout = StringIO() - cso.stderr = StringIO() - debug.catalog_shares(cso) - shares = cso.stdout.getvalue().splitlines() - oneshare = shares[0] # all shares should be MDMF - self.failIf(oneshare.startswith("UNKNOWN"), oneshare) - self.assertTrue(oneshare.startswith("MDMF"), oneshare) - fields = oneshare.split() - self.assertThat(fields[0], Equals("MDMF")) - self.assertThat(fields[1].encode("ascii"), Equals(storage_index)) - self.assertThat(fields[2], Equals("3/10")) - self.assertThat(fields[3], Equals("%d" % len(self.data))) - self.assertTrue(fields[4].startswith("#1:"), fields[3]) - # the rest of fields[4] is the roothash, which depends upon - # encryption salts and is not constant. fields[5] is the - # remaining time on the longest lease, which is timing dependent. - # The rest of the line is the quoted pathname to the share. - d.addCallback(_debug) - return d + do = debug.DumpOptions() + do["filename"] = sharefiles[0] + do.stdout = StringIO() + debug.dump_share(do) + output = do.stdout.getvalue() + lines = set(output.splitlines()) + self.assertTrue("Mutable slot found:" in lines, output) + self.assertTrue(" share_type: MDMF" in lines, output) + self.assertTrue(" num_extra_leases: 0" in lines, output) + self.assertTrue(" MDMF contents:" in lines, output) + self.assertTrue(" seqnum: 1" in lines, output) + self.assertTrue(" required_shares: 3" in lines, output) + self.assertTrue(" total_shares: 10" in lines, output) + self.assertTrue(" segsize: 131073" in lines, output) + self.assertTrue(" datalen: %d" % len(self.data) in lines, output) + vcap = str(n.get_verify_cap().to_string(), "utf-8") + self.assertTrue(" verify-cap: %s" % vcap in lines, output) + cso = debug.CatalogSharesOptions() + cso.nodedirs = fso.nodedirs + cso.stdout = StringIO() + cso.stderr = StringIO() + debug.catalog_shares(cso) + shares = cso.stdout.getvalue().splitlines() + oneshare = shares[0] # all shares should be MDMF + self.failIf(oneshare.startswith("UNKNOWN"), oneshare) + self.assertTrue(oneshare.startswith("MDMF"), oneshare) + fields = oneshare.split() + self.assertThat(fields[0], Equals("MDMF")) + self.assertThat(fields[1].encode("ascii"), Equals(storage_index)) + self.assertThat(fields[2], Equals("3/10")) + self.assertThat(fields[3], Equals("%d" % len(self.data))) + self.assertTrue(fields[4].startswith("#1:"), fields[3]) + # the rest of fields[4] is the roothash, which depends upon + # encryption salts and is not constant. fields[5] is the + # remaining time on the longest lease, which is timing dependent. + # The rest of the line is the quoted pathname to the share. + + async def test_get_sequence_number(self) -> None: + await self.do_upload() + bv = await self.mdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(1)) + bv = await self.sdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(1)) - def test_get_sequence_number(self): - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(1))) - d.addCallback(lambda ignored: - self.sdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(1))) # Now update. The sequence number in both cases should be 1 in # both cases. - def _do_update(ignored): - new_data = MutableData(b"foo bar baz" * 100000) - new_small_data = MutableData(b"foo bar baz" * 10) - d1 = self.mdmf_node.overwrite(new_data) - d2 = self.sdmf_node.overwrite(new_small_data) - dl = gatherResults([d1, d2]) - return dl - d.addCallback(_do_update) - d.addCallback(lambda ignored: - self.mdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(2))) - d.addCallback(lambda ignored: - self.sdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(2))) - return d + new_data = MutableData(b"foo bar baz" * 100000) + new_small_data = MutableData(b"foo bar baz" * 10) + d1 = self.mdmf_node.overwrite(new_data) + d2 = self.sdmf_node.overwrite(new_small_data) + await gatherResults([d1, d2]) + bv = await self.mdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(2)) + bv = await self.sdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(2)) - - def test_cap_after_upload(self): + async def test_cap_after_upload(self) -> None: # If we create a new mutable file and upload things to it, and # it's an MDMF file, we should get an MDMF cap back from that # file and should be able to use that. # That's essentially what MDMF node is, so just check that. - d = self.do_upload_mdmf() - def _then(ign): - mdmf_uri = self.mdmf_node.get_uri() - cap = uri.from_string(mdmf_uri) - self.assertTrue(isinstance(cap, uri.WriteableMDMFFileURI)) - readonly_mdmf_uri = self.mdmf_node.get_readonly_uri() - cap = uri.from_string(readonly_mdmf_uri) - self.assertTrue(isinstance(cap, uri.ReadonlyMDMFFileURI)) - d.addCallback(_then) - return d + await self.do_upload_mdmf() + mdmf_uri = self.mdmf_node.get_uri() + cap = uri.from_string(mdmf_uri) + self.assertTrue(isinstance(cap, uri.WriteableMDMFFileURI)) + readonly_mdmf_uri = self.mdmf_node.get_readonly_uri() + cap = uri.from_string(readonly_mdmf_uri) + self.assertTrue(isinstance(cap, uri.ReadonlyMDMFFileURI)) - def test_mutable_version(self): + async def test_mutable_version(self) -> None: # assert that getting parameters from the IMutableVersion object # gives us the same data as getting them from the filenode itself - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.get_best_mutable_version()) - def _check_mdmf(bv): - n = self.mdmf_node - self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) - self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) - self.assertFalse(bv.is_readonly()) - d.addCallback(_check_mdmf) - d.addCallback(lambda ign: self.sdmf_node.get_best_mutable_version()) - def _check_sdmf(bv): - n = self.sdmf_node - self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) - self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) - self.assertFalse(bv.is_readonly()) - d.addCallback(_check_sdmf) - return d + await self.do_upload() + bv = await self.mdmf_node.get_best_mutable_version() + n = self.mdmf_node + self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) + self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) + self.assertFalse(bv.is_readonly()) + + bv = await self.sdmf_node.get_best_mutable_version() + n = self.sdmf_node + self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) + self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) + self.assertFalse(bv.is_readonly()) - def test_get_readonly_version(self): - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: self.assertTrue(bv.is_readonly())) + async def test_get_readonly_version(self) -> None: + await self.do_upload() + bv = await self.mdmf_node.get_best_readable_version() + self.assertTrue(bv.is_readonly()) # Attempting to get a mutable version of a mutable file from a # filenode initialized with a readcap should return a readonly # version of that same node. - d.addCallback(lambda ign: self.mdmf_node.get_readonly()) - d.addCallback(lambda ro: ro.get_best_mutable_version()) - d.addCallback(lambda v: self.assertTrue(v.is_readonly())) + ro = self.mdmf_node.get_readonly() + v = await ro.get_best_mutable_version() + self.assertTrue(v.is_readonly()) - d.addCallback(lambda ign: self.sdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: self.assertTrue(bv.is_readonly())) + bv = await self.sdmf_node.get_best_readable_version() + self.assertTrue(bv.is_readonly()) - d.addCallback(lambda ign: self.sdmf_node.get_readonly()) - d.addCallback(lambda ro: ro.get_best_mutable_version()) - d.addCallback(lambda v: self.assertTrue(v.is_readonly())) - return d + ro = self.sdmf_node.get_readonly() + v = await ro.get_best_mutable_version() + self.assertTrue(v.is_readonly()) - def test_toplevel_overwrite(self): + async def test_toplevel_overwrite(self) -> None: new_data = MutableData(b"foo bar baz" * 100000) new_small_data = MutableData(b"foo bar baz" * 10) - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.overwrite(new_data)) - d.addCallback(lambda ignored: - self.mdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Equals(b"foo bar baz" * 100000))) - d.addCallback(lambda ignored: - self.sdmf_node.overwrite(new_small_data)) - d.addCallback(lambda ignored: - self.sdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Equals(b"foo bar baz" * 10))) - return d + await self.do_upload() + await self.mdmf_node.overwrite(new_data) + data = await self.mdmf_node.download_best_version() + self.assertThat(data, Equals(b"foo bar baz" * 100000)) + await self.sdmf_node.overwrite(new_small_data) + data = await self.sdmf_node.download_best_version() + self.assertThat(data, Equals(b"foo bar baz" * 10)) - def test_toplevel_modify(self): - d = self.do_upload() + async def test_toplevel_modify(self) -> None: + await self.do_upload() def modifier(old_contents, servermap, first_time): return old_contents + b"modified" - d.addCallback(lambda ign: self.mdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.mdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - d.addCallback(lambda ignored: - self.sdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.sdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - return d + await self.mdmf_node.modify(modifier) + data = await self.mdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) + await self.sdmf_node.modify(modifier) + data = await self.sdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) - def test_version_modify(self): + async def test_version_modify(self) -> None: # TODO: When we can publish multiple versions, alter this test # to modify a version other than the best usable version, then # test to see that the best recoverable version is that. - d = self.do_upload() + await self.do_upload() def modifier(old_contents, servermap, first_time): return old_contents + b"modified" - d.addCallback(lambda ign: self.mdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.mdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - d.addCallback(lambda ignored: - self.sdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.sdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - return d + await self.mdmf_node.modify(modifier) + data = await self.mdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) + await self.sdmf_node.modify(modifier) + data = await self.sdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) - def test_download_version(self): - d = self.publish_multiple() + async def test_download_version(self) -> None: + await self.publish_multiple() # We want to have two recoverable versions on the grid. - d.addCallback(lambda res: - self._set_versions({0:0,2:0,4:0,6:0,8:0, - 1:1,3:1,5:1,7:1,9:1})) + self._set_versions({0:0,2:0,4:0,6:0,8:0, + 1:1,3:1,5:1,7:1,9:1}) # Now try to download each version. We should get the plaintext # associated with that version. - d.addCallback(lambda ignored: - self._fn.get_servermap(mode=MODE_READ)) - def _got_servermap(smap): - versions = smap.recoverable_versions() - assert len(versions) == 2 + smap = await self._fn.get_servermap(mode=MODE_READ) + versions = smap.recoverable_versions() + assert len(versions) == 2 - self.servermap = smap - self.version1, self.version2 = versions - assert self.version1 != self.version2 + self.servermap = smap + self.version1, self.version2 = versions + assert self.version1 != self.version2 - self.version1_seqnum = self.version1[0] - self.version2_seqnum = self.version2[0] - self.version1_index = self.version1_seqnum - 1 - self.version2_index = self.version2_seqnum - 1 + self.version1_seqnum = self.version1[0] + self.version2_seqnum = self.version2[0] + self.version1_index = self.version1_seqnum - 1 + self.version2_index = self.version2_seqnum - 1 - d.addCallback(_got_servermap) - d.addCallback(lambda ignored: - self._fn.download_version(self.servermap, self.version1)) - d.addCallback(lambda results: - self.assertThat(self.CONTENTS[self.version1_index], - Equals(results))) - d.addCallback(lambda ignored: - self._fn.download_version(self.servermap, self.version2)) - d.addCallback(lambda results: - self.assertThat(self.CONTENTS[self.version2_index], - Equals(results))) - return d + results = await self._fn.download_version(self.servermap, self.version1) + self.assertThat(self.CONTENTS[self.version1_index], + Equals(results)) + results = await self._fn.download_version(self.servermap, self.version2) + self.assertThat(self.CONTENTS[self.version2_index], + Equals(results)) - def test_download_nonexistent_version(self): - d = self.do_upload_mdmf() - d.addCallback(lambda ign: self.mdmf_node.get_servermap(mode=MODE_WRITE)) - def _set_servermap(servermap): - self.servermap = servermap - d.addCallback(_set_servermap) - d.addCallback(lambda ignored: - self.shouldFail(UnrecoverableFileError, "nonexistent version", - None, - self.mdmf_node.download_version, self.servermap, - "not a version")) - return d + async def test_download_nonexistent_version(self) -> None: + await self.do_upload_mdmf() + servermap = await self.mdmf_node.get_servermap(mode=MODE_WRITE) + await self.shouldFail(UnrecoverableFileError, "nonexistent version", + None, + self.mdmf_node.download_version, servermap, + "not a version") - def _test_partial_read(self, node, expected, modes, step): - d = node.get_best_readable_version() + async def _test_partial_read(self, node, expected, modes, step) -> None: + version = await node.get_best_readable_version() for (name, offset, length) in modes: - d.addCallback(self._do_partial_read, name, expected, offset, length) + version = await self._do_partial_read(version, name, expected, offset, length) # then read the whole thing, but only a few bytes at a time, and see # that the results are what we expect. - def _read_data(version): - c = consumer.MemoryConsumer() - d2 = defer.succeed(None) - for i in range(0, len(expected), step): - d2.addCallback(lambda ignored, i=i: version.read(c, i, step)) - d2.addCallback(lambda ignored: - self.assertThat(expected, Equals(b"".join(c.chunks)))) - return d2 - d.addCallback(_read_data) - return d - - def _do_partial_read(self, version, name, expected, offset, length): c = consumer.MemoryConsumer() - d = version.read(c, offset, length) + for i in range(0, len(expected), step): + await version.read(c, i, step) + self.assertThat(expected, Equals(b"".join(c.chunks))) + + async def _do_partial_read(self, version, name, expected, offset, length) -> None: + c = consumer.MemoryConsumer() + await version.read(c, offset, length) if length is None: expected_range = expected[offset:] else: expected_range = expected[offset:offset+length] - d.addCallback(lambda ignored: b"".join(c.chunks)) - def _check(results): - if results != expected_range: - print("read([%d]+%s) got %d bytes, not %d" % \ - (offset, length, len(results), len(expected_range))) - print("got: %s ... %s" % (results[:20], results[-20:])) - print("exp: %s ... %s" % (expected_range[:20], expected_range[-20:])) - self.fail("results[%s] != expected_range" % name) - return version # daisy-chained to next call - d.addCallback(_check) - return d + results = b"".join(c.chunks) + if results != expected_range: + print("read([%d]+%s) got %d bytes, not %d" % \ + (offset, length, len(results), len(expected_range))) + print("got: %s ... %s" % (results[:20], results[-20:])) + print("exp: %s ... %s" % (expected_range[:20], expected_range[-20:])) + self.fail("results[%s] != expected_range" % name) + return version # daisy-chained to next call - def test_partial_read_mdmf_0(self): + async def test_partial_read_mdmf_0(self) -> None: data = b"" - d = self.do_upload_mdmf(data=data) + result = await self.do_upload_mdmf(data=data) modes = [("all1", 0,0), ("all2", 0,None), ] - d.addCallback(self._test_partial_read, data, modes, 1) - return d + await self._test_partial_read(result, data, modes, 1) - def test_partial_read_mdmf_large(self): + async def test_partial_read_mdmf_large(self) -> None: segment_boundary = mathutil.next_multiple(128 * 1024, 3) modes = [("start_on_segment_boundary", segment_boundary, 50), ("ending_one_byte_after_segment_boundary", segment_boundary-50, 51), @@ -386,20 +312,18 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file1", 0, len(self.data)), ("complete_file2", 0, None), ] - d = self.do_upload_mdmf() - d.addCallback(self._test_partial_read, self.data, modes, 10000) - return d + result = await self.do_upload_mdmf() + await self._test_partial_read(result, self.data, modes, 10000) - def test_partial_read_sdmf_0(self): + async def test_partial_read_sdmf_0(self) -> None: data = b"" modes = [("all1", 0,0), ("all2", 0,None), ] - d = self.do_upload_sdmf(data=data) - d.addCallback(self._test_partial_read, data, modes, 1) - return d + result = await self.do_upload_sdmf(data=data) + await self._test_partial_read(result, data, modes, 1) - def test_partial_read_sdmf_2(self): + async def test_partial_read_sdmf_2(self) -> None: data = b"hi" modes = [("one_byte", 0, 1), ("last_byte", 1, 1), @@ -407,11 +331,10 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file", 0, 2), ("complete_file2", 0, None), ] - d = self.do_upload_sdmf(data=data) - d.addCallback(self._test_partial_read, data, modes, 1) - return d + result = await self.do_upload_sdmf(data=data) + await self._test_partial_read(result, data, modes, 1) - def test_partial_read_sdmf_90(self): + async def test_partial_read_sdmf_90(self) -> None: modes = [("start_at_middle", 50, 40), ("start_at_middle2", 50, None), ("zero_length_at_start", 0, 0), @@ -420,11 +343,10 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file1", 0, None), ("complete_file2", 0, 90), ] - d = self.do_upload_sdmf() - d.addCallback(self._test_partial_read, self.small_data, modes, 10) - return d + result = await self.do_upload_sdmf() + await self._test_partial_read(result, self.small_data, modes, 10) - def test_partial_read_sdmf_100(self): + async def test_partial_read_sdmf_100(self) -> None: data = b"test data "*10 modes = [("start_at_middle", 50, 50), ("start_at_middle2", 50, None), @@ -433,42 +355,30 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file1", 0, 100), ("complete_file2", 0, None), ] - d = self.do_upload_sdmf(data=data) - d.addCallback(self._test_partial_read, data, modes, 10) - return d + result = await self.do_upload_sdmf(data=data) + await self._test_partial_read(result, data, modes, 10) + async def _test_read_and_download(self, node, expected) -> None: + version = await node.get_best_readable_version() + c = consumer.MemoryConsumer() + await version.read(c) + self.assertThat(expected, Equals(b"".join(c.chunks))) - def _test_read_and_download(self, node, expected): - d = node.get_best_readable_version() - def _read_data(version): - c = consumer.MemoryConsumer() - c2 = consumer.MemoryConsumer() - d2 = defer.succeed(None) - d2.addCallback(lambda ignored: version.read(c)) - d2.addCallback(lambda ignored: - self.assertThat(expected, Equals(b"".join(c.chunks)))) + c2 = consumer.MemoryConsumer() + await version.read(c2, offset=0, size=len(expected)) + self.assertThat(expected, Equals(b"".join(c2.chunks))) - d2.addCallback(lambda ignored: version.read(c2, offset=0, - size=len(expected))) - d2.addCallback(lambda ignored: - self.assertThat(expected, Equals(b"".join(c2.chunks)))) - return d2 - d.addCallback(_read_data) - d.addCallback(lambda ignored: node.download_best_version()) - d.addCallback(lambda data: self.assertThat(expected, Equals(data))) - return d + data = await node.download_best_version() + self.assertThat(expected, Equals(data)) - def test_read_and_download_mdmf(self): - d = self.do_upload_mdmf() - d.addCallback(self._test_read_and_download, self.data) - return d + async def test_read_and_download_mdmf(self) -> None: + result = await self.do_upload_mdmf() + await self._test_read_and_download(result, self.data) - def test_read_and_download_sdmf(self): - d = self.do_upload_sdmf() - d.addCallback(self._test_read_and_download, self.small_data) - return d + async def test_read_and_download_sdmf(self) -> None: + result = await self.do_upload_sdmf() + await self._test_read_and_download(result, self.small_data) - def test_read_and_download_sdmf_zero_length(self): - d = self.do_upload_empty_sdmf() - d.addCallback(self._test_read_and_download, b"") - return d + async def test_read_and_download_sdmf_zero_length(self) -> None: + result = await self.do_upload_empty_sdmf() + await self._test_read_and_download(result, b"") From e72847115be571559167181d5209fa3dccfbd458 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 30 Nov 2022 09:37:26 -0500 Subject: [PATCH 30/60] news fragment --- newsfragments/3947.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3947.minor diff --git a/newsfragments/3947.minor b/newsfragments/3947.minor new file mode 100644 index 000000000..e69de29bb From 156954c621f7b39406831ca18bed00a2dedf8b70 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 30 Nov 2022 09:43:01 -0500 Subject: [PATCH 31/60] no longer any need to "daisy chain" this value --- src/allmydata/test/mutable/test_version.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/allmydata/test/mutable/test_version.py b/src/allmydata/test/mutable/test_version.py index d14cc9295..1d9467694 100644 --- a/src/allmydata/test/mutable/test_version.py +++ b/src/allmydata/test/mutable/test_version.py @@ -270,7 +270,7 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ async def _test_partial_read(self, node, expected, modes, step) -> None: version = await node.get_best_readable_version() for (name, offset, length) in modes: - version = await self._do_partial_read(version, name, expected, offset, length) + await self._do_partial_read(version, name, expected, offset, length) # then read the whole thing, but only a few bytes at a time, and see # that the results are what we expect. c = consumer.MemoryConsumer() @@ -292,7 +292,6 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ print("got: %s ... %s" % (results[:20], results[-20:])) print("exp: %s ... %s" % (expected_range[:20], expected_range[-20:])) self.fail("results[%s] != expected_range" % name) - return version # daisy-chained to next call async def test_partial_read_mdmf_0(self) -> None: data = b"" From 05dfa875a771e6ff27006b8fc13aad3dc1709b67 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 30 Nov 2022 09:46:13 -0500 Subject: [PATCH 32/60] Quite a mypy warning about formatting bytes into a string --- src/allmydata/test/mutable/test_version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/mutable/test_version.py b/src/allmydata/test/mutable/test_version.py index 1d9467694..87050424b 100644 --- a/src/allmydata/test/mutable/test_version.py +++ b/src/allmydata/test/mutable/test_version.py @@ -289,8 +289,8 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ if results != expected_range: print("read([%d]+%s) got %d bytes, not %d" % \ (offset, length, len(results), len(expected_range))) - print("got: %s ... %s" % (results[:20], results[-20:])) - print("exp: %s ... %s" % (expected_range[:20], expected_range[-20:])) + print("got: %r ... %r" % (results[:20], results[-20:])) + print("exp: %r ... %r" % (expected_range[:20], expected_range[-20:])) self.fail("results[%s] != expected_range" % name) async def test_partial_read_mdmf_0(self) -> None: From 1436eb0fb689d5f230e3cdd44b41579d152d26ad Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 1 Dec 2022 14:26:41 -0500 Subject: [PATCH 33/60] Better explanation. --- newsfragments/3939.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newsfragments/3939.bugfix b/newsfragments/3939.bugfix index 61fb4244a..9d2071d32 100644 --- a/newsfragments/3939.bugfix +++ b/newsfragments/3939.bugfix @@ -1 +1 @@ -Uploading immutables will now use more bandwidth, which should allow for faster uploads in many cases. \ No newline at end of file +Uploading immutables will now better use available bandwidth, which should allow for faster uploads in many cases. \ No newline at end of file From 17dfda6b5aef1bcbb9b5baea2c0785fef6f833ca Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 1 Dec 2022 14:42:52 -0500 Subject: [PATCH 34/60] More direct API. --- src/allmydata/immutable/layout.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 562ca4470..6e8cfe1d8 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -121,7 +121,7 @@ class _WriteBuffer: and do a real write. """ self._to_write.write(data) - return len(self._to_write.getbuffer()) >= self._batch_size + return self._to_write.tell() >= self._batch_size def flush(self) -> tuple[int, bytes]: """Return offset and data to be written.""" @@ -133,7 +133,7 @@ class _WriteBuffer: def get_total_bytes_queued(self) -> int: """Return how many bytes were written or queued in total.""" - return self._written_bytes + len(self._to_write.getbuffer()) + return self._written_bytes + self._to_write.tell() @implementer(IStorageBucketWriter) From d4c202307caff5d4cb580421de4ce44d389c9193 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 1 Dec 2022 14:43:49 -0500 Subject: [PATCH 35/60] Better method name. --- src/allmydata/immutable/layout.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 6e8cfe1d8..3321ca0b6 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -131,7 +131,7 @@ class _WriteBuffer: self._to_write = BytesIO() return (offset, data) - def get_total_bytes_queued(self) -> int: + def get_total_bytes(self) -> int: """Return how many bytes were written or queued in total.""" return self._written_bytes + self._to_write.tell() @@ -289,7 +289,7 @@ class WriteBucketProxy(object): no holes. As such, the offset is technically unnecessary, but is used to check the inputs. Possibly we should get rid of it. """ - assert offset == self._write_buffer.get_total_bytes_queued() + assert offset == self._write_buffer.get_total_bytes() if self._write_buffer.queue_write(data): return self._actually_write() else: @@ -301,7 +301,7 @@ class WriteBucketProxy(object): return self._rref.callRemote("write", offset, data) def close(self): - assert self._write_buffer.get_total_bytes_queued() == self.get_allocated_size(), ( + assert self._write_buffer.get_total_bytes() == self.get_allocated_size(), ( f"{self._written_buffer.get_total_bytes_queued()} != {self.get_allocated_size()}" ) d = self._actually_write() From 0ba58070cdf0e463c4731da46d64eddc27bd26d9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 1 Dec 2022 14:45:39 -0500 Subject: [PATCH 36/60] Tweaks. --- src/allmydata/immutable/encode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 2b6602773..2414527ff 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -262,8 +262,8 @@ class Encoder(object): d.addCallback(lambda res: self.finish_hashing()) - # These calls have to happen in order, and waiting for previous one - # also ensures backpressure: + # These calls have to happen in order; layout.py now requires writes to + # be appended to the data written so far. d.addCallback(lambda res: self.send_crypttext_hash_tree_to_all_shareholders()) d.addCallback(lambda res: self.send_all_block_hash_trees()) From 8ed333b171f1a99da60948ddbd2eee84c93b0d78 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 1 Dec 2022 14:45:45 -0500 Subject: [PATCH 37/60] Correct explanation. --- src/allmydata/immutable/layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 3321ca0b6..f86590057 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -162,7 +162,7 @@ class WriteBucketProxy(object): self._create_offsets(block_size, data_size) - # With a ~1MB batch size, max upload speed is 1MB * round-trip latency + # With a ~1MB batch size, max upload speed is 1MB/(round-trip latency) # assuming the writing code waits for writes to finish, so 20MB/sec if # latency is 50ms. In the US many people only have 1MB/sec upload speed # as of 2022 (standard Comcast). For further discussion of how one From c93ff23da7479defe3f05f8ef622ee68be1bb568 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 1 Dec 2022 14:54:28 -0500 Subject: [PATCH 38/60] Don't send empty string writes. --- src/allmydata/immutable/layout.py | 14 +++++++++++--- src/allmydata/test/test_storage.py | 2 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index f86590057..9154f2f30 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -121,7 +121,7 @@ class _WriteBuffer: and do a real write. """ self._to_write.write(data) - return self._to_write.tell() >= self._batch_size + return self.get_queued_bytes() >= self._batch_size def flush(self) -> tuple[int, bytes]: """Return offset and data to be written.""" @@ -131,9 +131,13 @@ class _WriteBuffer: self._to_write = BytesIO() return (offset, data) + def get_queued_bytes(self) -> int: + """Return number of queued, unwritten bytes.""" + return self._to_write.tell() + def get_total_bytes(self) -> int: """Return how many bytes were written or queued in total.""" - return self._written_bytes + self._to_write.tell() + return self._written_bytes + self.get_queued_bytes() @implementer(IStorageBucketWriter) @@ -304,7 +308,11 @@ class WriteBucketProxy(object): assert self._write_buffer.get_total_bytes() == self.get_allocated_size(), ( f"{self._written_buffer.get_total_bytes_queued()} != {self.get_allocated_size()}" ) - d = self._actually_write() + if self._write_buffer.get_queued_bytes() > 0: + d = self._actually_write() + else: + # No data queued, don't send empty string write. + d = defer.succeed(True) d.addCallback(lambda _: self._rref.callRemote("close")) return d diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 820d4fd79..9b9d2d8de 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -3770,7 +3770,9 @@ class WriteBufferTests(SyncTestCase): result += flushed_data # Final flush: + remaining_length = wb.get_queued_bytes() flushed_offset, flushed_data = wb.flush() + self.assertEqual(remaining_length, len(flushed_data)) self.assertEqual(flushed_offset, len(result)) result += flushed_data From 1eb4a4adf8ba6b6f3c9ac236753a020428e424c5 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Dec 2022 16:47:24 -0700 Subject: [PATCH 39/60] Update newsfragments/3921.feature Co-authored-by: Jean-Paul Calderone --- newsfragments/3921.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newsfragments/3921.feature b/newsfragments/3921.feature index f2c3a98bd..798aee817 100644 --- a/newsfragments/3921.feature +++ b/newsfragments/3921.feature @@ -1,4 +1,4 @@ -Automatically exit when stdin is closed +`tahoe run ...` will now exit when its stdin is closed. This facilitates subprocess management, specifically cleanup. When a parent process is running tahoe and exits without time to do "proper" cleanup at least the stdin descriptor will be closed. From 7ffcfcdb67213ef9002279f2a55b57a2789fa12d Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Dec 2022 16:47:40 -0700 Subject: [PATCH 40/60] Update src/allmydata/test/test_runner.py Co-authored-by: Jean-Paul Calderone --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index bce5b3c20..9830487f3 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -661,7 +661,7 @@ class OnStdinCloseTests(SyncTestCase): def test_exception_ignored(self): """ - an exception from or on-close function is ignored + An exception from our on-close function is discarded. """ reactor = MemoryReactorClock() called = [] From 3d43cbccc9ef24fb3168e89eb63a93d682584417 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Dec 2022 17:01:38 -0700 Subject: [PATCH 41/60] log less-specific failures --- src/allmydata/scripts/tahoe_run.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index 49507765e..aaf234b61 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -21,9 +21,11 @@ from twisted.scripts import twistd from twisted.python import usage from twisted.python.filepath import FilePath from twisted.python.reflect import namedAny +from twisted.python.failure import Failure from twisted.internet.defer import maybeDeferred, Deferred from twisted.internet.protocol import Protocol from twisted.internet.stdio import StandardIO +from twisted.internet.error import ReactorNotRunning from twisted.application.service import Service from allmydata.scripts.default_nodedir import _default_nodedir @@ -238,12 +240,15 @@ def on_stdin_close(reactor, fn): def on_close(arg): try: fn() + except ReactorNotRunning: + pass except Exception: - # for our "exit" use-case, this will _mostly_ just be + # for our "exit" use-case failures will _mostly_ just be # ReactorNotRunning (because we're already shutting down # when our stdin closes) but no matter what "bad thing" - # happens we just want to ignore it. - pass + # happens we just want to ignore it .. although other + # errors might be interesting so we'll log those + print(Failure()) return arg when_closed_d.addBoth(on_close) From 36ed554627dc393172d8532b6a66c1bcdbb22334 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Dec 2022 17:03:48 -0700 Subject: [PATCH 42/60] proto -> transport --- src/allmydata/test/test_runner.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 52ec84ae6..cd09e5aa0 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -644,15 +644,15 @@ class OnStdinCloseTests(SyncTestCase): def onclose(): called.append(True) - proto = on_stdin_close(reactor, onclose) + transport = on_stdin_close(reactor, onclose) self.assertEqual(called, []) # on Unix we can just close all the readers, correctly # "simulating" a stdin close .. of course, Windows has to be # difficult if platform.isWindows(): - proto.writeConnectionLost() - proto.readConnectionLost() + transport.writeConnectionLost() + transport.readConnectionLost() else: for reader in reactor.getReaders(): reader.loseConnection() @@ -670,12 +670,12 @@ class OnStdinCloseTests(SyncTestCase): def onclose(): called.append(True) raise RuntimeError("unexpected error") - proto = on_stdin_close(reactor, onclose) + transport = on_stdin_close(reactor, onclose) self.assertEqual(called, []) if platform.isWindows(): - proto.writeConnectionLost() - proto.readConnectionLost() + transport.writeConnectionLost() + transport.readConnectionLost() else: for reader in reactor.getReaders(): reader.loseConnection() From 20b3594d128e4a90b56ef516a713d44156d17122 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Dec 2022 17:05:58 -0700 Subject: [PATCH 43/60] exarkun wants a helper --- src/allmydata/test/test_runner.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index cd09e5aa0..a84fa28f8 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -630,6 +630,15 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): yield client_running +def _simulate_windows_stdin_close(stdio): + """ + on Unix we can just close all the readers, correctly "simulating" + a stdin close .. of course, Windows has to be difficult + """ + stdio.writeConnectionLost() + stdio.readConnectionLost() + + class OnStdinCloseTests(SyncTestCase): """ Tests for on_stdin_close @@ -647,12 +656,8 @@ class OnStdinCloseTests(SyncTestCase): transport = on_stdin_close(reactor, onclose) self.assertEqual(called, []) - # on Unix we can just close all the readers, correctly - # "simulating" a stdin close .. of course, Windows has to be - # difficult if platform.isWindows(): - transport.writeConnectionLost() - transport.readConnectionLost() + _simulate_windows_stdin_close(transport) else: for reader in reactor.getReaders(): reader.loseConnection() @@ -674,8 +679,7 @@ class OnStdinCloseTests(SyncTestCase): self.assertEqual(called, []) if platform.isWindows(): - transport.writeConnectionLost() - transport.readConnectionLost() + _simulate_windows_stdin_close(transport) else: for reader in reactor.getReaders(): reader.loseConnection() From 89b6a008d2277e17832acf3afb16c6e71f88715c Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Dec 2022 23:24:24 -0700 Subject: [PATCH 44/60] since 'coverage report' is what fails with disk-space on windows, try turning it off --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index fc95a0469..9b6dc8756 100644 --- a/tox.ini +++ b/tox.ini @@ -86,7 +86,7 @@ commands = coverage: python -b -m coverage run -m twisted.trial {env:TAHOE_LAFS_TRIAL_ARGS:--rterrors --reporter=timing} {posargs:{env:TEST_SUITE}} coverage: coverage combine coverage: coverage xml - coverage: coverage report +## coverage: coverage report [testenv:integration] basepython = python3 @@ -99,7 +99,7 @@ commands = # NOTE: 'run with "py.test --keep-tempdir -s -v integration/" to debug failures' py.test --timeout=1800 --coverage -s -v {posargs:integration} coverage combine - coverage report +## coverage report [testenv:codechecks] From 3d831f653ba20286ea94e512cf0e87882cbc9e26 Mon Sep 17 00:00:00 2001 From: meejah Date: Thu, 1 Dec 2022 23:58:53 -0700 Subject: [PATCH 45/60] cleanup --- .github/workflows/ci.yml | 2 +- tox.ini | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 163266613..15e7d8fa4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: python-version: "pypy-3.7" - os: ubuntu-latest python-version: "pypy-3.8" - + steps: # See https://github.com/actions/checkout. A fetch-depth of 0 # fetches all tags and branches. diff --git a/tox.ini b/tox.ini index 9b6dc8756..db4748033 100644 --- a/tox.ini +++ b/tox.ini @@ -86,7 +86,6 @@ commands = coverage: python -b -m coverage run -m twisted.trial {env:TAHOE_LAFS_TRIAL_ARGS:--rterrors --reporter=timing} {posargs:{env:TEST_SUITE}} coverage: coverage combine coverage: coverage xml -## coverage: coverage report [testenv:integration] basepython = python3 @@ -99,7 +98,6 @@ commands = # NOTE: 'run with "py.test --keep-tempdir -s -v integration/" to debug failures' py.test --timeout=1800 --coverage -s -v {posargs:integration} coverage combine -## coverage report [testenv:codechecks] From 9619e286f4e0f50c20975f5789418eed09f4e350 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 2 Dec 2022 08:16:02 -0500 Subject: [PATCH 46/60] Switch the web testing double to BytesKeyDict This will catch more str/bytes errors by default than `dict` --- src/allmydata/testing/web.py | 3 ++- src/allmydata/util/dictutil.py | 34 +++++++--------------------------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/src/allmydata/testing/web.py b/src/allmydata/testing/web.py index 6538dc3a4..a687e5480 100644 --- a/src/allmydata/testing/web.py +++ b/src/allmydata/testing/web.py @@ -45,6 +45,7 @@ import allmydata.uri from allmydata.util import ( base32, ) +from ..util.dictutil import BytesKeyDict __all__ = ( @@ -138,7 +139,7 @@ class _FakeTahoeUriHandler(Resource, object): isLeaf = True - data: Dict[bytes, bytes] = attr.ib(default=attr.Factory(dict)) + data: BytesKeyDict[bytes, bytes] = attr.ib(default=attr.Factory(BytesKeyDict)) capability_generators = attr.ib(default=attr.Factory(dict)) def _generate_capability(self, kind): diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index 5971d26f6..0a7df0a38 100644 --- a/src/allmydata/util/dictutil.py +++ b/src/allmydata/util/dictutil.py @@ -1,21 +1,6 @@ """ Tools to mess with dicts. - -Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - # IMPORTANT: We deliberately don't import dict. The issue is that we're - # subclassing dict, so we'd end up exposing Python 3 dict APIs to lots of - # code that doesn't support it. - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, list, object, range, str, max, min # noqa: F401 -from six import ensure_str - class DictOfSets(dict): def add(self, key, value): @@ -104,7 +89,7 @@ def _make_enforcing_override(K, method_name): raise TypeError("{} must be of type {}".format( repr(key), self.KEY_TYPE)) return getattr(dict, method_name)(self, key, *args, **kwargs) - f.__name__ = ensure_str(method_name) + f.__name__ = method_name setattr(K, method_name, f) for _method_name in ["__setitem__", "__getitem__", "setdefault", "get", @@ -113,18 +98,13 @@ for _method_name in ["__setitem__", "__getitem__", "setdefault", "get", del _method_name -if PY2: - # No need for enforcement, can use either bytes or unicode as keys and it's - # fine. - BytesKeyDict = UnicodeKeyDict = dict -else: - class BytesKeyDict(_TypedKeyDict): - """Keys should be bytes.""" +class BytesKeyDict(_TypedKeyDict): + """Keys should be bytes.""" - KEY_TYPE = bytes + KEY_TYPE = bytes - class UnicodeKeyDict(_TypedKeyDict): - """Keys should be unicode strings.""" +class UnicodeKeyDict(_TypedKeyDict): + """Keys should be unicode strings.""" - KEY_TYPE = str + KEY_TYPE = str From a84b278ecdaf3263030ad9817961d70cdfdfd741 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 2 Dec 2022 08:26:15 -0500 Subject: [PATCH 47/60] support older pythons --- src/allmydata/testing/web.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/testing/web.py b/src/allmydata/testing/web.py index a687e5480..be7878d57 100644 --- a/src/allmydata/testing/web.py +++ b/src/allmydata/testing/web.py @@ -11,6 +11,8 @@ Test-helpers for clients that use the WebUI. """ +from __future__ import annotations + import hashlib from typing import Dict From b40d882fceb20fa102ca32540819555a08b2fdf1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 2 Dec 2022 08:28:22 -0500 Subject: [PATCH 48/60] remove unused import --- src/allmydata/testing/web.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/testing/web.py b/src/allmydata/testing/web.py index be7878d57..4af2603a8 100644 --- a/src/allmydata/testing/web.py +++ b/src/allmydata/testing/web.py @@ -14,7 +14,6 @@ Test-helpers for clients that use the WebUI. from __future__ import annotations import hashlib -from typing import Dict import attr From c6cc3708f45c9e581a719852d4f1082528941701 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 2 Dec 2022 08:38:46 -0500 Subject: [PATCH 49/60] Fixup the annotations a bit --- src/allmydata/testing/web.py | 2 +- src/allmydata/util/dictutil.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/allmydata/testing/web.py b/src/allmydata/testing/web.py index 4af2603a8..72ecd7161 100644 --- a/src/allmydata/testing/web.py +++ b/src/allmydata/testing/web.py @@ -140,7 +140,7 @@ class _FakeTahoeUriHandler(Resource, object): isLeaf = True - data: BytesKeyDict[bytes, bytes] = attr.ib(default=attr.Factory(BytesKeyDict)) + data: BytesKeyDict[bytes] = attr.ib(default=attr.Factory(BytesKeyDict)) capability_generators = attr.ib(default=attr.Factory(dict)) def _generate_capability(self, kind): diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index 0a7df0a38..c436ab963 100644 --- a/src/allmydata/util/dictutil.py +++ b/src/allmydata/util/dictutil.py @@ -2,6 +2,10 @@ Tools to mess with dicts. """ +from __future__ import annotations + +from typing import TypeVar, Type + class DictOfSets(dict): def add(self, key, value): if key in self: @@ -64,7 +68,10 @@ class AuxValueDict(dict): self.auxilliary[key] = auxilliary -class _TypedKeyDict(dict): +K = TypeVar("K") +V = TypeVar("V") + +class _TypedKeyDict(dict[K, V]): """Dictionary that enforces key type. Doesn't override everything, but probably good enough to catch most @@ -73,7 +80,7 @@ class _TypedKeyDict(dict): Subclass and override KEY_TYPE. """ - KEY_TYPE = object + KEY_TYPE: Type[K] def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) @@ -98,13 +105,13 @@ for _method_name in ["__setitem__", "__getitem__", "setdefault", "get", del _method_name -class BytesKeyDict(_TypedKeyDict): +class BytesKeyDict(_TypedKeyDict[bytes, V]): """Keys should be bytes.""" KEY_TYPE = bytes -class UnicodeKeyDict(_TypedKeyDict): +class UnicodeKeyDict(_TypedKeyDict[str, V]): """Keys should be unicode strings.""" KEY_TYPE = str From c542b8463707cd5720822de260f79b61c7582994 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 2 Dec 2022 08:47:07 -0500 Subject: [PATCH 50/60] remove the annotations everything is broken on older pythons --- src/allmydata/testing/web.py | 2 +- src/allmydata/util/dictutil.py | 15 ++++----------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/allmydata/testing/web.py b/src/allmydata/testing/web.py index 72ecd7161..4f68b3774 100644 --- a/src/allmydata/testing/web.py +++ b/src/allmydata/testing/web.py @@ -140,7 +140,7 @@ class _FakeTahoeUriHandler(Resource, object): isLeaf = True - data: BytesKeyDict[bytes] = attr.ib(default=attr.Factory(BytesKeyDict)) + data: BytesKeyDict = attr.ib(default=attr.Factory(BytesKeyDict)) capability_generators = attr.ib(default=attr.Factory(dict)) def _generate_capability(self, kind): diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index c436ab963..0a7df0a38 100644 --- a/src/allmydata/util/dictutil.py +++ b/src/allmydata/util/dictutil.py @@ -2,10 +2,6 @@ Tools to mess with dicts. """ -from __future__ import annotations - -from typing import TypeVar, Type - class DictOfSets(dict): def add(self, key, value): if key in self: @@ -68,10 +64,7 @@ class AuxValueDict(dict): self.auxilliary[key] = auxilliary -K = TypeVar("K") -V = TypeVar("V") - -class _TypedKeyDict(dict[K, V]): +class _TypedKeyDict(dict): """Dictionary that enforces key type. Doesn't override everything, but probably good enough to catch most @@ -80,7 +73,7 @@ class _TypedKeyDict(dict[K, V]): Subclass and override KEY_TYPE. """ - KEY_TYPE: Type[K] + KEY_TYPE = object def __init__(self, *args, **kwargs): dict.__init__(self, *args, **kwargs) @@ -105,13 +98,13 @@ for _method_name in ["__setitem__", "__getitem__", "setdefault", "get", del _method_name -class BytesKeyDict(_TypedKeyDict[bytes, V]): +class BytesKeyDict(_TypedKeyDict): """Keys should be bytes.""" KEY_TYPE = bytes -class UnicodeKeyDict(_TypedKeyDict[str, V]): +class UnicodeKeyDict(_TypedKeyDict): """Keys should be unicode strings.""" KEY_TYPE = str From f6a46c86d24b59110d148b2879c2d7e6647d0501 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 9 Dec 2022 14:11:59 -0500 Subject: [PATCH 51/60] Populate the wheelhouse with a working version of tox --- .circleci/populate-wheelhouse.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/populate-wheelhouse.sh b/.circleci/populate-wheelhouse.sh index 519a80cac..39bf4ae4c 100755 --- a/.circleci/populate-wheelhouse.sh +++ b/.circleci/populate-wheelhouse.sh @@ -9,7 +9,7 @@ BASIC_DEPS="pip wheel" # Python packages we need to support the test infrastructure. *Not* packages # Tahoe-LAFS itself (implementation or test suite) need. -TEST_DEPS="tox codecov" +TEST_DEPS="'tox~=3.0' codecov" # Python packages we need to generate test reports for CI infrastructure. # *Not* packages Tahoe-LAFS itself (implement or test suite) need. From 13aa000d0b4c42c550e42a7d85ad2a0035b7b56d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 9 Dec 2022 14:06:24 -0500 Subject: [PATCH 52/60] Some features we depend on are broken in tox 4 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 768e44e29..8558abd02 100644 --- a/setup.py +++ b/setup.py @@ -396,7 +396,7 @@ setup(name="tahoe-lafs", # also set in __init__.py "pyflakes == 2.2.0", "coverage ~= 5.0", "mock", - "tox", + "tox ~= 3.0", "pytest", "pytest-twisted", "hypothesis >= 3.6.1", From 666cd24c2b07e5a4ea70100ec3a1554296c47507 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 9 Dec 2022 14:07:38 -0500 Subject: [PATCH 53/60] Also constrain tox here --- .circleci/config.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 051e690b7..d7e4f2563 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -133,10 +133,10 @@ jobs: steps: - "checkout" - - run: + - run: &INSTALL_TOX name: "Install tox" command: | - pip install --user tox + pip install --user 'tox~=3.0' - run: name: "Static-ish code checks" @@ -152,9 +152,7 @@ jobs: - "checkout" - run: - name: "Install tox" - command: | - pip install --user tox + <<: *INSTALL_TOX - run: name: "Make PyInstaller executable" From 43c044a11b8c98565dcc035608ec82a18affcf08 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 9 Dec 2022 14:13:29 -0500 Subject: [PATCH 54/60] build me the images --- .circleci/config.yml | 106 +++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d7e4f2563..89748c5aa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,69 +14,69 @@ version: 2.1 workflows: ci: jobs: - # Start with jobs testing various platforms. - - "debian-10": - {} - - "debian-11": - {} + # # Start with jobs testing various platforms. + # - "debian-10": + # {} + # - "debian-11": + # {} - - "ubuntu-20-04": - {} - - "ubuntu-18-04": - requires: - - "ubuntu-20-04" + # - "ubuntu-20-04": + # {} + # - "ubuntu-18-04": + # requires: + # - "ubuntu-20-04" - # Equivalent to RHEL 8; CentOS 8 is dead. - - "oraclelinux-8": - {} + # # Equivalent to RHEL 8; CentOS 8 is dead. + # - "oraclelinux-8": + # {} - - "nixos": - name: "NixOS 21.05" - nixpkgs: "21.05" + # - "nixos": + # name: "NixOS 21.05" + # nixpkgs: "21.05" - - "nixos": - name: "NixOS 21.11" - nixpkgs: "21.11" + # - "nixos": + # name: "NixOS 21.11" + # nixpkgs: "21.11" - # Eventually, test against PyPy 3.8 - #- "pypy27-buster": - # {} + # # Eventually, test against PyPy 3.8 + # #- "pypy27-buster": + # # {} - # Other assorted tasks and configurations - - "codechecks": - {} - - "pyinstaller": - {} - - "c-locale": - {} - # Any locale other than C or UTF-8. - - "another-locale": - {} + # # Other assorted tasks and configurations + # - "codechecks": + # {} + # - "pyinstaller": + # {} + # - "c-locale": + # {} + # # Any locale other than C or UTF-8. + # - "another-locale": + # {} - - "integration": - requires: - # If the unit test suite doesn't pass, don't bother running the - # integration tests. - - "debian-11" + # - "integration": + # requires: + # # If the unit test suite doesn't pass, don't bother running the + # # integration tests. + # - "debian-11" - - "typechecks": - {} - - "docs": - {} + # - "typechecks": + # {} + # - "docs": + # {} - images: - # Build the Docker images used by the ci jobs. This makes the ci jobs - # faster and takes various spurious failures out of the critical path. - triggers: - # Build once a day - - schedule: - cron: "0 0 * * *" - filters: - branches: - only: - - "master" + # images: + # # Build the Docker images used by the ci jobs. This makes the ci jobs + # # faster and takes various spurious failures out of the critical path. + # triggers: + # # Build once a day + # - schedule: + # cron: "0 0 * * *" + # filters: + # branches: + # only: + # - "master" - jobs: + # jobs: # Every job that pushes a Docker image from Docker Hub needs to provide # credentials. Use this first job to define a yaml anchor that can be # used to supply a CircleCI job context which makes Docker Hub From e835ed538fe81876898b36cbccd5fd32bac75554 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 9 Dec 2022 14:18:40 -0500 Subject: [PATCH 55/60] Okay don't quote it then --- .circleci/populate-wheelhouse.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/populate-wheelhouse.sh b/.circleci/populate-wheelhouse.sh index 39bf4ae4c..857171979 100755 --- a/.circleci/populate-wheelhouse.sh +++ b/.circleci/populate-wheelhouse.sh @@ -9,7 +9,7 @@ BASIC_DEPS="pip wheel" # Python packages we need to support the test infrastructure. *Not* packages # Tahoe-LAFS itself (implementation or test suite) need. -TEST_DEPS="'tox~=3.0' codecov" +TEST_DEPS="tox~=3.0 codecov" # Python packages we need to generate test reports for CI infrastructure. # *Not* packages Tahoe-LAFS itself (implement or test suite) need. From d5380fe1569f6548df37002949b0169a55ca4151 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 9 Dec 2022 14:27:37 -0500 Subject: [PATCH 56/60] regular ci config --- .circleci/config.yml | 106 +++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 89748c5aa..d7e4f2563 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,69 +14,69 @@ version: 2.1 workflows: ci: jobs: - # # Start with jobs testing various platforms. - # - "debian-10": - # {} - # - "debian-11": - # {} + # Start with jobs testing various platforms. + - "debian-10": + {} + - "debian-11": + {} - # - "ubuntu-20-04": - # {} - # - "ubuntu-18-04": - # requires: - # - "ubuntu-20-04" + - "ubuntu-20-04": + {} + - "ubuntu-18-04": + requires: + - "ubuntu-20-04" - # # Equivalent to RHEL 8; CentOS 8 is dead. - # - "oraclelinux-8": - # {} + # Equivalent to RHEL 8; CentOS 8 is dead. + - "oraclelinux-8": + {} - # - "nixos": - # name: "NixOS 21.05" - # nixpkgs: "21.05" + - "nixos": + name: "NixOS 21.05" + nixpkgs: "21.05" - # - "nixos": - # name: "NixOS 21.11" - # nixpkgs: "21.11" + - "nixos": + name: "NixOS 21.11" + nixpkgs: "21.11" - # # Eventually, test against PyPy 3.8 - # #- "pypy27-buster": - # # {} + # Eventually, test against PyPy 3.8 + #- "pypy27-buster": + # {} - # # Other assorted tasks and configurations - # - "codechecks": - # {} - # - "pyinstaller": - # {} - # - "c-locale": - # {} - # # Any locale other than C or UTF-8. - # - "another-locale": - # {} + # Other assorted tasks and configurations + - "codechecks": + {} + - "pyinstaller": + {} + - "c-locale": + {} + # Any locale other than C or UTF-8. + - "another-locale": + {} - # - "integration": - # requires: - # # If the unit test suite doesn't pass, don't bother running the - # # integration tests. - # - "debian-11" + - "integration": + requires: + # If the unit test suite doesn't pass, don't bother running the + # integration tests. + - "debian-11" - # - "typechecks": - # {} - # - "docs": - # {} + - "typechecks": + {} + - "docs": + {} - # images: - # # Build the Docker images used by the ci jobs. This makes the ci jobs - # # faster and takes various spurious failures out of the critical path. - # triggers: - # # Build once a day - # - schedule: - # cron: "0 0 * * *" - # filters: - # branches: - # only: - # - "master" + images: + # Build the Docker images used by the ci jobs. This makes the ci jobs + # faster and takes various spurious failures out of the critical path. + triggers: + # Build once a day + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - "master" - # jobs: + jobs: # Every job that pushes a Docker image from Docker Hub needs to provide # credentials. Use this first job to define a yaml anchor that can be # used to supply a CircleCI job context which makes Docker Hub From ea0426318ea695def6a66593ae44d372b17d194e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 12 Dec 2022 10:02:43 -0500 Subject: [PATCH 57/60] news fragment --- newsfragments/3950.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3950.minor diff --git a/newsfragments/3950.minor b/newsfragments/3950.minor new file mode 100644 index 000000000..e69de29bb From b8680750daa6af924d4da4549780f9cdbc224605 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 12 Dec 2022 11:47:32 -0500 Subject: [PATCH 58/60] pin it in more places --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15e7d8fa4..c1e0c9391 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,7 +80,7 @@ jobs: - name: Install Python packages run: | - pip install --upgrade codecov tox tox-gh-actions setuptools + pip install --upgrade codecov "tox<4" tox-gh-actions setuptools pip list - name: Display tool versions @@ -199,7 +199,7 @@ jobs: - name: Install Python packages run: | - pip install --upgrade tox + pip install --upgrade "tox<4" pip list - name: Display tool versions @@ -247,7 +247,7 @@ jobs: - name: Install Python packages run: | - pip install --upgrade tox + pip install --upgrade "tox<4" pip list - name: Display tool versions From 8282fce4cdd574ec4b8cc849070ed4ac2ee03cc8 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 13 Dec 2022 08:57:21 -0500 Subject: [PATCH 59/60] build the images again --- .circleci/config.yml | 106 +++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d7e4f2563..89748c5aa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,69 +14,69 @@ version: 2.1 workflows: ci: jobs: - # Start with jobs testing various platforms. - - "debian-10": - {} - - "debian-11": - {} + # # Start with jobs testing various platforms. + # - "debian-10": + # {} + # - "debian-11": + # {} - - "ubuntu-20-04": - {} - - "ubuntu-18-04": - requires: - - "ubuntu-20-04" + # - "ubuntu-20-04": + # {} + # - "ubuntu-18-04": + # requires: + # - "ubuntu-20-04" - # Equivalent to RHEL 8; CentOS 8 is dead. - - "oraclelinux-8": - {} + # # Equivalent to RHEL 8; CentOS 8 is dead. + # - "oraclelinux-8": + # {} - - "nixos": - name: "NixOS 21.05" - nixpkgs: "21.05" + # - "nixos": + # name: "NixOS 21.05" + # nixpkgs: "21.05" - - "nixos": - name: "NixOS 21.11" - nixpkgs: "21.11" + # - "nixos": + # name: "NixOS 21.11" + # nixpkgs: "21.11" - # Eventually, test against PyPy 3.8 - #- "pypy27-buster": - # {} + # # Eventually, test against PyPy 3.8 + # #- "pypy27-buster": + # # {} - # Other assorted tasks and configurations - - "codechecks": - {} - - "pyinstaller": - {} - - "c-locale": - {} - # Any locale other than C or UTF-8. - - "another-locale": - {} + # # Other assorted tasks and configurations + # - "codechecks": + # {} + # - "pyinstaller": + # {} + # - "c-locale": + # {} + # # Any locale other than C or UTF-8. + # - "another-locale": + # {} - - "integration": - requires: - # If the unit test suite doesn't pass, don't bother running the - # integration tests. - - "debian-11" + # - "integration": + # requires: + # # If the unit test suite doesn't pass, don't bother running the + # # integration tests. + # - "debian-11" - - "typechecks": - {} - - "docs": - {} + # - "typechecks": + # {} + # - "docs": + # {} - images: - # Build the Docker images used by the ci jobs. This makes the ci jobs - # faster and takes various spurious failures out of the critical path. - triggers: - # Build once a day - - schedule: - cron: "0 0 * * *" - filters: - branches: - only: - - "master" + # images: + # # Build the Docker images used by the ci jobs. This makes the ci jobs + # # faster and takes various spurious failures out of the critical path. + # triggers: + # # Build once a day + # - schedule: + # cron: "0 0 * * *" + # filters: + # branches: + # only: + # - "master" - jobs: + # jobs: # Every job that pushes a Docker image from Docker Hub needs to provide # credentials. Use this first job to define a yaml anchor that can be # used to supply a CircleCI job context which makes Docker Hub From 815c998c3323829f066752be0f3f707e2716a490 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 13 Dec 2022 09:09:02 -0500 Subject: [PATCH 60/60] regular ci --- .circleci/config.yml | 106 +++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 89748c5aa..d7e4f2563 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,69 +14,69 @@ version: 2.1 workflows: ci: jobs: - # # Start with jobs testing various platforms. - # - "debian-10": - # {} - # - "debian-11": - # {} + # Start with jobs testing various platforms. + - "debian-10": + {} + - "debian-11": + {} - # - "ubuntu-20-04": - # {} - # - "ubuntu-18-04": - # requires: - # - "ubuntu-20-04" + - "ubuntu-20-04": + {} + - "ubuntu-18-04": + requires: + - "ubuntu-20-04" - # # Equivalent to RHEL 8; CentOS 8 is dead. - # - "oraclelinux-8": - # {} + # Equivalent to RHEL 8; CentOS 8 is dead. + - "oraclelinux-8": + {} - # - "nixos": - # name: "NixOS 21.05" - # nixpkgs: "21.05" + - "nixos": + name: "NixOS 21.05" + nixpkgs: "21.05" - # - "nixos": - # name: "NixOS 21.11" - # nixpkgs: "21.11" + - "nixos": + name: "NixOS 21.11" + nixpkgs: "21.11" - # # Eventually, test against PyPy 3.8 - # #- "pypy27-buster": - # # {} + # Eventually, test against PyPy 3.8 + #- "pypy27-buster": + # {} - # # Other assorted tasks and configurations - # - "codechecks": - # {} - # - "pyinstaller": - # {} - # - "c-locale": - # {} - # # Any locale other than C or UTF-8. - # - "another-locale": - # {} + # Other assorted tasks and configurations + - "codechecks": + {} + - "pyinstaller": + {} + - "c-locale": + {} + # Any locale other than C or UTF-8. + - "another-locale": + {} - # - "integration": - # requires: - # # If the unit test suite doesn't pass, don't bother running the - # # integration tests. - # - "debian-11" + - "integration": + requires: + # If the unit test suite doesn't pass, don't bother running the + # integration tests. + - "debian-11" - # - "typechecks": - # {} - # - "docs": - # {} + - "typechecks": + {} + - "docs": + {} - # images: - # # Build the Docker images used by the ci jobs. This makes the ci jobs - # # faster and takes various spurious failures out of the critical path. - # triggers: - # # Build once a day - # - schedule: - # cron: "0 0 * * *" - # filters: - # branches: - # only: - # - "master" + images: + # Build the Docker images used by the ci jobs. This makes the ci jobs + # faster and takes various spurious failures out of the critical path. + triggers: + # Build once a day + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - "master" - # jobs: + jobs: # Every job that pushes a Docker image from Docker Hub needs to provide # credentials. Use this first job to define a yaml anchor that can be # used to supply a CircleCI job context which makes Docker Hub