Merge remote-tracking branch 'origin/master' into 3948.static-setup

This commit is contained in:
Jean-Paul Calderone 2022-12-14 09:00:50 -05:00
commit f766703948
19 changed files with 290 additions and 468 deletions

View File

@ -133,10 +133,10 @@ jobs:
steps: steps:
- "checkout" - "checkout"
- run: - run: &INSTALL_TOX
name: "Install tox" name: "Install tox"
command: | command: |
pip install --user tox pip install --user 'tox~=3.0'
- run: - run:
name: "Static-ish code checks" name: "Static-ish code checks"
@ -152,9 +152,7 @@ jobs:
- "checkout" - "checkout"
- run: - run:
name: "Install tox" <<: *INSTALL_TOX
command: |
pip install --user tox
- run: - run:
name: "Make PyInstaller executable" name: "Make PyInstaller executable"

View File

@ -9,7 +9,7 @@ BASIC_DEPS="pip wheel setuptools setuptools_scm"
# Python packages we need to support the test infrastructure. *Not* packages # Python packages we need to support the test infrastructure. *Not* packages
# Tahoe-LAFS itself (implementation or test suite) need. # Tahoe-LAFS itself (implementation or test suite) need.
TEST_DEPS="tox codecov" TEST_DEPS="tox~=3.0 codecov"
# Python packages we need to generate test reports for CI infrastructure. # Python packages we need to generate test reports for CI infrastructure.
# *Not* packages Tahoe-LAFS itself (implement or test suite) need. # *Not* packages Tahoe-LAFS itself (implement or test suite) need.

View File

@ -63,7 +63,7 @@ jobs:
python-version: "pypy-3.7" python-version: "pypy-3.7"
- os: ubuntu-latest - os: ubuntu-latest
python-version: "pypy-3.8" python-version: "pypy-3.8"
steps: steps:
# See https://github.com/actions/checkout. A fetch-depth of 0 # See https://github.com/actions/checkout. A fetch-depth of 0
# fetches all tags and branches. # fetches all tags and branches.
@ -80,7 +80,7 @@ jobs:
- name: Install Python packages - name: Install Python packages
run: | run: |
pip install --upgrade codecov tox tox-gh-actions setuptools pip install --upgrade codecov "tox<4" tox-gh-actions setuptools
pip list pip list
- name: Display tool versions - name: Display tool versions
@ -199,7 +199,7 @@ jobs:
- name: Install Python packages - name: Install Python packages
run: | run: |
pip install --upgrade tox pip install --upgrade "tox<4"
pip list pip list
- name: Display tool versions - name: Display tool versions
@ -247,7 +247,7 @@ jobs:
- name: Install Python packages - name: Install Python packages
run: | run: |
pip install --upgrade tox pip install --upgrade "tox<4"
pip list pip list
- name: Display tool versions - name: Display tool versions

0
newsfragments/3874.minor Normal file
View File

View File

@ -0,0 +1,5 @@
`tahoe run ...` will now exit when its stdin is closed.
This facilitates subprocess management, specifically cleanup.
When a parent process is running tahoe and exits without time to do "proper" cleanup at least the stdin descriptor will be closed.
Subsequently "tahoe run" notices this and exits.

View File

@ -0,0 +1 @@
Uploading immutables will now better use available bandwidth, which should allow for faster uploads in many cases.

0
newsfragments/3950.minor Normal file
View File

View File

@ -166,7 +166,7 @@ test =
pyflakes == 2.2.0 pyflakes == 2.2.0
coverage ~= 5.0 coverage ~= 5.0
mock mock
tox tox ~= 3.0
pytest pytest
pytest-twisted pytest-twisted
hypothesis >= 3.6.1 hypothesis >= 3.6.1

View File

@ -262,6 +262,8 @@ class Encoder(object):
d.addCallback(lambda res: self.finish_hashing()) d.addCallback(lambda res: self.finish_hashing())
# These calls have to happen in order; layout.py now requires writes to
# be appended to the data written so far.
d.addCallback(lambda res: d.addCallback(lambda res:
self.send_crypttext_hash_tree_to_all_shareholders()) self.send_crypttext_hash_tree_to_all_shareholders())
d.addCallback(lambda res: self.send_all_block_hash_trees()) d.addCallback(lambda res: self.send_all_block_hash_trees())

View File

@ -1,21 +1,18 @@
""" """
Ported to Python 3. Ported to Python 3.
""" """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2 from __future__ import annotations
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import struct import struct
from io import BytesIO
from attrs import define, field
from zope.interface import implementer from zope.interface import implementer
from twisted.internet import defer from twisted.internet import defer
from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \
FileTooLargeError, HASH_SIZE FileTooLargeError, HASH_SIZE
from allmydata.util import mathutil, observer, pipeline, log from allmydata.util import mathutil, observer, log
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.storage.server import si_b2a from allmydata.storage.server import si_b2a
@ -107,19 +104,58 @@ def make_write_bucket_proxy(rref, server,
num_share_hashes, uri_extension_size) num_share_hashes, uri_extension_size)
return wbp return wbp
@define
class _WriteBuffer:
"""
Queue up small writes to be written in a single batched larger write.
"""
_batch_size: int
_to_write : BytesIO = field(factory=BytesIO)
_written_bytes : int = field(default=0)
def queue_write(self, data: bytes) -> bool:
"""
Queue a write. If the result is ``False``, no further action is needed
for now. If the result is some ``True``, it's time to call ``flush()``
and do a real write.
"""
self._to_write.write(data)
return self.get_queued_bytes() >= self._batch_size
def flush(self) -> tuple[int, bytes]:
"""Return offset and data to be written."""
offset = self._written_bytes
data = self._to_write.getvalue()
self._written_bytes += len(data)
self._to_write = BytesIO()
return (offset, data)
def get_queued_bytes(self) -> int:
"""Return number of queued, unwritten bytes."""
return self._to_write.tell()
def get_total_bytes(self) -> int:
"""Return how many bytes were written or queued in total."""
return self._written_bytes + self.get_queued_bytes()
@implementer(IStorageBucketWriter) @implementer(IStorageBucketWriter)
class WriteBucketProxy(object): class WriteBucketProxy(object):
"""
Note: The various ``put_`` methods need to be called in the order in which the
bytes will get written.
"""
fieldsize = 4 fieldsize = 4
fieldstruct = ">L" fieldstruct = ">L"
def __init__(self, rref, server, data_size, block_size, num_segments, def __init__(self, rref, server, data_size, block_size, num_segments,
num_share_hashes, uri_extension_size, pipeline_size=50000): num_share_hashes, uri_extension_size, batch_size=1_000_000):
self._rref = rref self._rref = rref
self._server = server self._server = server
self._data_size = data_size self._data_size = data_size
self._block_size = block_size self._block_size = block_size
self._num_segments = num_segments self._num_segments = num_segments
self._written_bytes = 0
effective_segments = mathutil.next_power_of_k(num_segments,2) effective_segments = mathutil.next_power_of_k(num_segments,2)
self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE
@ -130,11 +166,13 @@ class WriteBucketProxy(object):
self._create_offsets(block_size, data_size) self._create_offsets(block_size, data_size)
# k=3, max_segment_size=128KiB gives us a typical segment of 43691 # With a ~1MB batch size, max upload speed is 1MB/(round-trip latency)
# bytes. Setting the default pipeline_size to 50KB lets us get two # assuming the writing code waits for writes to finish, so 20MB/sec if
# segments onto the wire but not a third, which would keep the pipe # latency is 50ms. In the US many people only have 1MB/sec upload speed
# filled. # as of 2022 (standard Comcast). For further discussion of how one
self._pipeline = pipeline.Pipeline(pipeline_size) # might set batch sizes see
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3787#comment:1.
self._write_buffer = _WriteBuffer(batch_size)
def get_allocated_size(self): def get_allocated_size(self):
return (self._offsets['uri_extension'] + self.fieldsize + return (self._offsets['uri_extension'] + self.fieldsize +
@ -179,7 +217,7 @@ class WriteBucketProxy(object):
return "<WriteBucketProxy for node %r>" % self._server.get_name() return "<WriteBucketProxy for node %r>" % self._server.get_name()
def put_header(self): def put_header(self):
return self._write(0, self._offset_data) return self._queue_write(0, self._offset_data)
def put_block(self, segmentnum, data): def put_block(self, segmentnum, data):
offset = self._offsets['data'] + segmentnum * self._block_size offset = self._offsets['data'] + segmentnum * self._block_size
@ -193,13 +231,13 @@ class WriteBucketProxy(object):
(self._block_size * (self._block_size *
(self._num_segments - 1))), (self._num_segments - 1))),
len(data), self._block_size) len(data), self._block_size)
return self._write(offset, data) return self._queue_write(offset, data)
def put_crypttext_hashes(self, hashes): def put_crypttext_hashes(self, hashes):
# plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and # plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and
# so is not explicitly written, but we need to write everything, so # so is not explicitly written, but we need to write everything, so
# fill it in with nulls. # fill it in with nulls.
d = self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) d = self._queue_write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size)
d.addCallback(lambda _: self._really_put_crypttext_hashes(hashes)) d.addCallback(lambda _: self._really_put_crypttext_hashes(hashes))
return d return d
@ -212,7 +250,7 @@ class WriteBucketProxy(object):
precondition(offset + len(data) <= self._offsets['block_hashes'], precondition(offset + len(data) <= self._offsets['block_hashes'],
offset, len(data), offset+len(data), offset, len(data), offset+len(data),
self._offsets['block_hashes']) self._offsets['block_hashes'])
return self._write(offset, data) return self._queue_write(offset, data)
def put_block_hashes(self, blockhashes): def put_block_hashes(self, blockhashes):
offset = self._offsets['block_hashes'] offset = self._offsets['block_hashes']
@ -223,7 +261,7 @@ class WriteBucketProxy(object):
precondition(offset + len(data) <= self._offsets['share_hashes'], precondition(offset + len(data) <= self._offsets['share_hashes'],
offset, len(data), offset+len(data), offset, len(data), offset+len(data),
self._offsets['share_hashes']) self._offsets['share_hashes'])
return self._write(offset, data) return self._queue_write(offset, data)
def put_share_hashes(self, sharehashes): def put_share_hashes(self, sharehashes):
# sharehashes is a list of (index, hash) tuples, so they get stored # sharehashes is a list of (index, hash) tuples, so they get stored
@ -237,29 +275,45 @@ class WriteBucketProxy(object):
precondition(offset + len(data) <= self._offsets['uri_extension'], precondition(offset + len(data) <= self._offsets['uri_extension'],
offset, len(data), offset+len(data), offset, len(data), offset+len(data),
self._offsets['uri_extension']) self._offsets['uri_extension'])
return self._write(offset, data) return self._queue_write(offset, data)
def put_uri_extension(self, data): def put_uri_extension(self, data):
offset = self._offsets['uri_extension'] offset = self._offsets['uri_extension']
assert isinstance(data, bytes) assert isinstance(data, bytes)
precondition(len(data) == self._uri_extension_size) precondition(len(data) == self._uri_extension_size)
length = struct.pack(self.fieldstruct, len(data)) length = struct.pack(self.fieldstruct, len(data))
return self._write(offset, length+data) return self._queue_write(offset, length+data)
def _write(self, offset, data): def _queue_write(self, offset, data):
# use a Pipeline to pipeline several writes together. TODO: another """
# speedup would be to coalesce small writes into a single call: this This queues up small writes to be written in a single batched larger
# would reduce the foolscap CPU overhead per share, but wouldn't write.
# reduce the number of round trips, so it might not be worth the
# effort. Callers of this function are expected to queue the data in order, with
self._written_bytes += len(data) no holes. As such, the offset is technically unnecessary, but is used
return self._pipeline.add(len(data), to check the inputs. Possibly we should get rid of it.
self._rref.callRemote, "write", offset, data) """
assert offset == self._write_buffer.get_total_bytes()
if self._write_buffer.queue_write(data):
return self._actually_write()
else:
return defer.succeed(False)
def _actually_write(self):
"""Write data to the server."""
offset, data = self._write_buffer.flush()
return self._rref.callRemote("write", offset, data)
def close(self): def close(self):
assert self._written_bytes == self.get_allocated_size(), f"{self._written_bytes} != {self.get_allocated_size()}" assert self._write_buffer.get_total_bytes() == self.get_allocated_size(), (
d = self._pipeline.add(0, self._rref.callRemote, "close") f"{self._written_buffer.get_total_bytes_queued()} != {self.get_allocated_size()}"
d.addCallback(lambda ign: self._pipeline.flush()) )
if self._write_buffer.get_queued_bytes() > 0:
d = self._actually_write()
else:
# No data queued, don't send empty string write.
d = defer.succeed(True)
d.addCallback(lambda _: self._rref.callRemote("close"))
return d return d
def abort(self): def abort(self):
@ -371,16 +425,16 @@ class ReadBucketProxy(object):
self._fieldsize = fieldsize self._fieldsize = fieldsize
self._fieldstruct = fieldstruct self._fieldstruct = fieldstruct
for field in ( 'data', for field_name in ( 'data',
'plaintext_hash_tree', # UNUSED 'plaintext_hash_tree', # UNUSED
'crypttext_hash_tree', 'crypttext_hash_tree',
'block_hashes', 'block_hashes',
'share_hashes', 'share_hashes',
'uri_extension', 'uri_extension',
): ):
offset = struct.unpack(fieldstruct, data[x:x+fieldsize])[0] offset = struct.unpack(fieldstruct, data[x:x+fieldsize])[0]
x += fieldsize x += fieldsize
self._offsets[field] = offset self._offsets[field_name] = offset
return self._offsets return self._offsets
def _get_block_data(self, unused, blocknum, blocksize, thisblocksize): def _get_block_data(self, unused, blocknum, blocksize, thisblocksize):

View File

@ -21,7 +21,11 @@ from twisted.scripts import twistd
from twisted.python import usage from twisted.python import usage
from twisted.python.filepath import FilePath from twisted.python.filepath import FilePath
from twisted.python.reflect import namedAny from twisted.python.reflect import namedAny
from twisted.internet.defer import maybeDeferred from twisted.python.failure import Failure
from twisted.internet.defer import maybeDeferred, Deferred
from twisted.internet.protocol import Protocol
from twisted.internet.stdio import StandardIO
from twisted.internet.error import ReactorNotRunning
from twisted.application.service import Service from twisted.application.service import Service
from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.scripts.default_nodedir import _default_nodedir
@ -155,6 +159,8 @@ class DaemonizeTheRealService(Service, HookMixin):
def startService(self): def startService(self):
from twisted.internet import reactor
def start(): def start():
node_to_instance = { node_to_instance = {
u"client": lambda: maybeDeferred(namedAny("allmydata.client.create_client"), self.basedir), u"client": lambda: maybeDeferred(namedAny("allmydata.client.create_client"), self.basedir),
@ -194,12 +200,14 @@ class DaemonizeTheRealService(Service, HookMixin):
def created(srv): def created(srv):
srv.setServiceParent(self.parent) srv.setServiceParent(self.parent)
# exiting on stdin-closed facilitates cleanup when run
# as a subprocess
on_stdin_close(reactor, reactor.stop)
d.addCallback(created) d.addCallback(created)
d.addErrback(handle_config_error) d.addErrback(handle_config_error)
d.addBoth(self._call_hook, 'running') d.addBoth(self._call_hook, 'running')
return d return d
from twisted.internet import reactor
reactor.callWhenRunning(start) reactor.callWhenRunning(start)
@ -213,6 +221,46 @@ class DaemonizeTahoeNodePlugin(object):
return DaemonizeTheRealService(self.nodetype, self.basedir, so) return DaemonizeTheRealService(self.nodetype, self.basedir, so)
def on_stdin_close(reactor, fn):
"""
Arrange for the function `fn` to run when our stdin closes
"""
when_closed_d = Deferred()
class WhenClosed(Protocol):
"""
Notify a Deferred when our connection is lost .. as this is passed
to twisted's StandardIO class, it is used to detect our parent
going away.
"""
def connectionLost(self, reason):
when_closed_d.callback(None)
def on_close(arg):
try:
fn()
except ReactorNotRunning:
pass
except Exception:
# for our "exit" use-case failures will _mostly_ just be
# ReactorNotRunning (because we're already shutting down
# when our stdin closes) but no matter what "bad thing"
# happens we just want to ignore it .. although other
# errors might be interesting so we'll log those
print(Failure())
return arg
when_closed_d.addBoth(on_close)
# we don't need to do anything with this instance because it gets
# hooked into the reactor and thus remembered .. but we return it
# for Windows testing purposes.
return StandardIO(
proto=WhenClosed(),
reactor=reactor,
)
def run(reactor, config, runApp=twistd.runApp): def run(reactor, config, runApp=twistd.runApp):
""" """
Runs a Tahoe-LAFS node in the foreground. Runs a Tahoe-LAFS node in the foreground.

View File

@ -1,198 +0,0 @@
"""
Tests for allmydata.util.pipeline.
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import gc
from twisted.internet import defer
from twisted.trial import unittest
from twisted.python import log
from twisted.python.failure import Failure
from allmydata.util import pipeline
class Pipeline(unittest.TestCase):
def pause(self, *args, **kwargs):
d = defer.Deferred()
self.calls.append( (d, args, kwargs) )
return d
def failUnlessCallsAre(self, expected):
#print(self.calls)
#print(expected)
self.failUnlessEqual(len(self.calls), len(expected), self.calls)
for i,c in enumerate(self.calls):
self.failUnlessEqual(c[1:], expected[i], str(i))
def test_basic(self):
self.calls = []
finished = []
p = pipeline.Pipeline(100)
d = p.flush() # fires immediately
d.addCallbacks(finished.append, log.err)
self.failUnlessEqual(len(finished), 1)
finished = []
d = p.add(10, self.pause, "one")
# the call should start right away, and our return Deferred should
# fire right away
d.addCallbacks(finished.append, log.err)
self.failUnlessEqual(len(finished), 1)
self.failUnlessEqual(finished[0], None)
self.failUnlessCallsAre([ ( ("one",) , {} ) ])
self.failUnlessEqual(p.gauge, 10)
# pipeline: [one]
finished = []
d = p.add(20, self.pause, "two", kw=2)
# pipeline: [one, two]
# the call and the Deferred should fire right away
d.addCallbacks(finished.append, log.err)
self.failUnlessEqual(len(finished), 1)
self.failUnlessEqual(finished[0], None)
self.failUnlessCallsAre([ ( ("one",) , {} ),
( ("two",) , {"kw": 2} ),
])
self.failUnlessEqual(p.gauge, 30)
self.calls[0][0].callback("one-result")
# pipeline: [two]
self.failUnlessEqual(p.gauge, 20)
finished = []
d = p.add(90, self.pause, "three", "posarg1")
# pipeline: [two, three]
flushed = []
fd = p.flush()
fd.addCallbacks(flushed.append, log.err)
self.failUnlessEqual(flushed, [])
# the call will be made right away, but the return Deferred will not,
# because the pipeline is now full.
d.addCallbacks(finished.append, log.err)
self.failUnlessEqual(len(finished), 0)
self.failUnlessCallsAre([ ( ("one",) , {} ),
( ("two",) , {"kw": 2} ),
( ("three", "posarg1"), {} ),
])
self.failUnlessEqual(p.gauge, 110)
self.failUnlessRaises(pipeline.SingleFileError, p.add, 10, self.pause)
# retiring either call will unblock the pipeline, causing the #3
# Deferred to fire
self.calls[2][0].callback("three-result")
# pipeline: [two]
self.failUnlessEqual(len(finished), 1)
self.failUnlessEqual(finished[0], None)
self.failUnlessEqual(flushed, [])
# retiring call#2 will finally allow the flush() Deferred to fire
self.calls[1][0].callback("two-result")
self.failUnlessEqual(len(flushed), 1)
def test_errors(self):
self.calls = []
p = pipeline.Pipeline(100)
d1 = p.add(200, self.pause, "one")
d2 = p.flush()
finished = []
d1.addBoth(finished.append)
self.failUnlessEqual(finished, [])
flushed = []
d2.addBoth(flushed.append)
self.failUnlessEqual(flushed, [])
self.calls[0][0].errback(ValueError("oops"))
self.failUnlessEqual(len(finished), 1)
f = finished[0]
self.failUnless(isinstance(f, Failure))
self.failUnless(f.check(pipeline.PipelineError))
self.failUnlessIn("PipelineError", str(f.value))
self.failUnlessIn("ValueError", str(f.value))
r = repr(f.value)
self.failUnless("ValueError" in r, r)
f2 = f.value.error
self.failUnless(f2.check(ValueError))
self.failUnlessEqual(len(flushed), 1)
f = flushed[0]
self.failUnless(isinstance(f, Failure))
self.failUnless(f.check(pipeline.PipelineError))
f2 = f.value.error
self.failUnless(f2.check(ValueError))
# now that the pipeline is in the failed state, any new calls will
# fail immediately
d3 = p.add(20, self.pause, "two")
finished = []
d3.addBoth(finished.append)
self.failUnlessEqual(len(finished), 1)
f = finished[0]
self.failUnless(isinstance(f, Failure))
self.failUnless(f.check(pipeline.PipelineError))
r = repr(f.value)
self.failUnless("ValueError" in r, r)
f2 = f.value.error
self.failUnless(f2.check(ValueError))
d4 = p.flush()
flushed = []
d4.addBoth(flushed.append)
self.failUnlessEqual(len(flushed), 1)
f = flushed[0]
self.failUnless(isinstance(f, Failure))
self.failUnless(f.check(pipeline.PipelineError))
f2 = f.value.error
self.failUnless(f2.check(ValueError))
def test_errors2(self):
self.calls = []
p = pipeline.Pipeline(100)
d1 = p.add(10, self.pause, "one")
d2 = p.add(20, self.pause, "two")
d3 = p.add(30, self.pause, "three")
d4 = p.flush()
# one call fails, then the second one succeeds: make sure
# ExpandableDeferredList tolerates the second one
flushed = []
d4.addBoth(flushed.append)
self.failUnlessEqual(flushed, [])
self.calls[0][0].errback(ValueError("oops"))
self.failUnlessEqual(len(flushed), 1)
f = flushed[0]
self.failUnless(isinstance(f, Failure))
self.failUnless(f.check(pipeline.PipelineError))
f2 = f.value.error
self.failUnless(f2.check(ValueError))
self.calls[1][0].callback("two-result")
self.calls[2][0].errback(ValueError("three-error"))
del d1,d2,d3,d4
gc.collect() # for PyPy

View File

@ -47,6 +47,9 @@ from twisted.internet.defer import (
inlineCallbacks, inlineCallbacks,
DeferredList, DeferredList,
) )
from twisted.internet.testing import (
MemoryReactorClock,
)
from twisted.python.filepath import FilePath from twisted.python.filepath import FilePath
from allmydata.util import fileutil, pollmixin from allmydata.util import fileutil, pollmixin
from allmydata.util.encodingutil import unicode_to_argv from allmydata.util.encodingutil import unicode_to_argv
@ -60,6 +63,9 @@ import allmydata
from allmydata.scripts.runner import ( from allmydata.scripts.runner import (
parse_options, parse_options,
) )
from allmydata.scripts.tahoe_run import (
on_stdin_close,
)
from .common import ( from .common import (
PIPE, PIPE,
@ -624,6 +630,64 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin):
yield client_running yield client_running
def _simulate_windows_stdin_close(stdio):
"""
on Unix we can just close all the readers, correctly "simulating"
a stdin close .. of course, Windows has to be difficult
"""
stdio.writeConnectionLost()
stdio.readConnectionLost()
class OnStdinCloseTests(SyncTestCase):
"""
Tests for on_stdin_close
"""
def test_close_called(self):
"""
our on-close method is called when stdin closes
"""
reactor = MemoryReactorClock()
called = []
def onclose():
called.append(True)
transport = on_stdin_close(reactor, onclose)
self.assertEqual(called, [])
if platform.isWindows():
_simulate_windows_stdin_close(transport)
else:
for reader in reactor.getReaders():
reader.loseConnection()
reactor.advance(1) # ProcessReader does a callLater(0, ..)
self.assertEqual(called, [True])
def test_exception_ignored(self):
"""
An exception from our on-close function is discarded.
"""
reactor = MemoryReactorClock()
called = []
def onclose():
called.append(True)
raise RuntimeError("unexpected error")
transport = on_stdin_close(reactor, onclose)
self.assertEqual(called, [])
if platform.isWindows():
_simulate_windows_stdin_close(transport)
else:
for reader in reactor.getReaders():
reader.loseConnection()
reactor.advance(1) # ProcessReader does a callLater(0, ..)
self.assertEqual(called, [True])
class PidFileLocking(SyncTestCase): class PidFileLocking(SyncTestCase):
""" """
Direct tests for allmydata.util.pid functions Direct tests for allmydata.util.pid functions

View File

@ -3,14 +3,9 @@ Tests for allmydata.storage.
Ported to Python 3. Ported to Python 3.
""" """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import native_str, PY2, bytes_to_native_str, bchr from __future__ import annotations
if PY2: from future.utils import native_str, bytes_to_native_str, bchr
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from six import ensure_str from six import ensure_str
from io import ( from io import (
@ -59,7 +54,7 @@ from allmydata.storage.common import storage_index_to_dir, \
si_b2a, si_a2b si_b2a, si_a2b
from allmydata.storage.lease import LeaseInfo from allmydata.storage.lease import LeaseInfo
from allmydata.immutable.layout import WriteBucketProxy, WriteBucketProxy_v2, \ from allmydata.immutable.layout import WriteBucketProxy, WriteBucketProxy_v2, \
ReadBucketProxy ReadBucketProxy, _WriteBuffer
from allmydata.mutable.layout import MDMFSlotWriteProxy, MDMFSlotReadProxy, \ from allmydata.mutable.layout import MDMFSlotWriteProxy, MDMFSlotReadProxy, \
LayoutInvalid, MDMFSIGNABLEHEADER, \ LayoutInvalid, MDMFSIGNABLEHEADER, \
SIGNED_PREFIX, MDMFHEADER, \ SIGNED_PREFIX, MDMFHEADER, \
@ -3746,3 +3741,39 @@ class LeaseInfoTests(SyncTestCase):
info.to_mutable_data(), info.to_mutable_data(),
HasLength(info.mutable_size()), HasLength(info.mutable_size()),
) )
class WriteBufferTests(SyncTestCase):
"""Tests for ``_WriteBuffer``."""
@given(
small_writes=strategies.lists(
strategies.binary(min_size=1, max_size=20),
min_size=10, max_size=20),
batch_size=strategies.integers(min_value=5, max_value=10)
)
def test_write_buffer(self, small_writes: list[bytes], batch_size: int):
"""
``_WriteBuffer`` coalesces small writes into bigger writes based on
the batch size.
"""
wb = _WriteBuffer(batch_size)
result = b""
for data in small_writes:
should_flush = wb.queue_write(data)
if should_flush:
flushed_offset, flushed_data = wb.flush()
self.assertEqual(flushed_offset, len(result))
# The flushed data is in batch sizes, or closest approximation
# given queued inputs:
self.assertTrue(batch_size <= len(flushed_data) < batch_size + len(data))
result += flushed_data
# Final flush:
remaining_length = wb.get_queued_bytes()
flushed_offset, flushed_data = wb.flush()
self.assertEqual(remaining_length, len(flushed_data))
self.assertEqual(flushed_offset, len(result))
result += flushed_data
self.assertEqual(result, b"".join(small_writes))

View File

@ -9,18 +9,7 @@
""" """
Tests for the allmydata.testing helpers Tests for the allmydata.testing helpers
Ported to Python 3.
""" """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from twisted.internet.defer import ( from twisted.internet.defer import (
inlineCallbacks, inlineCallbacks,
@ -56,10 +45,12 @@ from testtools.matchers import (
IsInstance, IsInstance,
MatchesStructure, MatchesStructure,
AfterPreprocessing, AfterPreprocessing,
Contains,
) )
from testtools.twistedsupport import ( from testtools.twistedsupport import (
succeeded, succeeded,
) )
from twisted.web.http import GONE
class FakeWebTest(SyncTestCase): class FakeWebTest(SyncTestCase):
@ -144,7 +135,8 @@ class FakeWebTest(SyncTestCase):
def test_download_missing(self): def test_download_missing(self):
""" """
Error if we download a capability that doesn't exist The response to a request to download a capability that doesn't exist
is 410 (GONE).
""" """
http_client = create_tahoe_treq_client() http_client = create_tahoe_treq_client()
@ -157,7 +149,11 @@ class FakeWebTest(SyncTestCase):
resp, resp,
succeeded( succeeded(
MatchesStructure( MatchesStructure(
code=Equals(500) code=Equals(GONE),
content=AfterPreprocessing(
lambda m: m(),
succeeded(Contains(b"No data for")),
),
) )
) )
) )

View File

@ -6,20 +6,12 @@
# This file is part of Tahoe-LAFS. # This file is part of Tahoe-LAFS.
# #
# See the docs/about.rst file for licensing information. # See the docs/about.rst file for licensing information.
"""Test-helpers for clients that use the WebUI.
Ported to Python 3.
""" """
from __future__ import absolute_import Test-helpers for clients that use the WebUI.
from __future__ import division """
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
import hashlib import hashlib
@ -54,6 +46,7 @@ import allmydata.uri
from allmydata.util import ( from allmydata.util import (
base32, base32,
) )
from ..util.dictutil import BytesKeyDict
__all__ = ( __all__ = (
@ -147,7 +140,7 @@ class _FakeTahoeUriHandler(Resource, object):
isLeaf = True isLeaf = True
data = attr.ib(default=attr.Factory(dict)) data: BytesKeyDict = attr.ib(default=attr.Factory(BytesKeyDict))
capability_generators = attr.ib(default=attr.Factory(dict)) capability_generators = attr.ib(default=attr.Factory(dict))
def _generate_capability(self, kind): def _generate_capability(self, kind):
@ -209,7 +202,7 @@ class _FakeTahoeUriHandler(Resource, object):
capability = None capability = None
for arg, value in uri.query: for arg, value in uri.query:
if arg == u"uri": if arg == u"uri":
capability = value capability = value.encode("utf-8")
# it's legal to use the form "/uri/<capability>" # it's legal to use the form "/uri/<capability>"
if capability is None and request.postpath and request.postpath[0]: if capability is None and request.postpath and request.postpath[0]:
capability = request.postpath[0] capability = request.postpath[0]
@ -221,10 +214,9 @@ class _FakeTahoeUriHandler(Resource, object):
# the user gave us a capability; if our Grid doesn't have any # the user gave us a capability; if our Grid doesn't have any
# data for it, that's an error. # data for it, that's an error.
capability = capability.encode('ascii')
if capability not in self.data: if capability not in self.data:
request.setResponseCode(http.BAD_REQUEST) request.setResponseCode(http.GONE)
return u"No data for '{}'".format(capability.decode('ascii')) return u"No data for '{}'".format(capability.decode('ascii')).encode("utf-8")
return self.data[capability] return self.data[capability]

View File

@ -1,21 +1,6 @@
""" """
Tools to mess with dicts. Tools to mess with dicts.
Ported to Python 3.
""" """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
# IMPORTANT: We deliberately don't import dict. The issue is that we're
# subclassing dict, so we'd end up exposing Python 3 dict APIs to lots of
# code that doesn't support it.
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, list, object, range, str, max, min # noqa: F401
from six import ensure_str
class DictOfSets(dict): class DictOfSets(dict):
def add(self, key, value): def add(self, key, value):
@ -104,7 +89,7 @@ def _make_enforcing_override(K, method_name):
raise TypeError("{} must be of type {}".format( raise TypeError("{} must be of type {}".format(
repr(key), self.KEY_TYPE)) repr(key), self.KEY_TYPE))
return getattr(dict, method_name)(self, key, *args, **kwargs) return getattr(dict, method_name)(self, key, *args, **kwargs)
f.__name__ = ensure_str(method_name) f.__name__ = method_name
setattr(K, method_name, f) setattr(K, method_name, f)
for _method_name in ["__setitem__", "__getitem__", "setdefault", "get", for _method_name in ["__setitem__", "__getitem__", "setdefault", "get",
@ -113,18 +98,13 @@ for _method_name in ["__setitem__", "__getitem__", "setdefault", "get",
del _method_name del _method_name
if PY2: class BytesKeyDict(_TypedKeyDict):
# No need for enforcement, can use either bytes or unicode as keys and it's """Keys should be bytes."""
# fine.
BytesKeyDict = UnicodeKeyDict = dict
else:
class BytesKeyDict(_TypedKeyDict):
"""Keys should be bytes."""
KEY_TYPE = bytes KEY_TYPE = bytes
class UnicodeKeyDict(_TypedKeyDict): class UnicodeKeyDict(_TypedKeyDict):
"""Keys should be unicode strings.""" """Keys should be unicode strings."""
KEY_TYPE = str KEY_TYPE = str

View File

@ -1,149 +0,0 @@
"""
A pipeline of Deferreds.
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from twisted.internet import defer
from twisted.python.failure import Failure
from twisted.python import log
from allmydata.util.assertutil import precondition
class PipelineError(Exception):
"""One of the pipelined messages returned an error. The received Failure
object is stored in my .error attribute."""
def __init__(self, error):
self.error = error
def __repr__(self):
return "<PipelineError error=(%r)>" % (self.error,)
def __str__(self):
return "<PipelineError error=(%s)>" % (self.error,)
class SingleFileError(Exception):
"""You are not permitted to add a job to a full pipeline."""
class ExpandableDeferredList(defer.Deferred, object):
# like DeferredList(fireOnOneErrback=True) with a built-in
# gatherResults(), but you can add new Deferreds until you close it. This
# gives you a chance to add don't-complain-about-unhandled-error errbacks
# immediately after attachment, regardless of whether you actually end up
# wanting the list or not.
def __init__(self):
defer.Deferred.__init__(self)
self.resultsReceived = 0
self.resultList = []
self.failure = None
self.closed = False
def addDeferred(self, d):
precondition(not self.closed, "don't call addDeferred() on a closed ExpandableDeferredList")
index = len(self.resultList)
self.resultList.append(None)
d.addCallbacks(self._cbDeferred, self._ebDeferred,
callbackArgs=(index,))
return d
def close(self):
self.closed = True
self.checkForFinished()
def checkForFinished(self):
if not self.closed:
return
if self.called:
return
if self.failure:
self.errback(self.failure)
elif self.resultsReceived == len(self.resultList):
self.callback(self.resultList)
def _cbDeferred(self, res, index):
self.resultList[index] = res
self.resultsReceived += 1
self.checkForFinished()
return res
def _ebDeferred(self, f):
self.failure = f
self.checkForFinished()
return f
class Pipeline(object):
"""I manage a size-limited pipeline of Deferred operations, usually
callRemote() messages."""
def __init__(self, capacity):
self.capacity = capacity # how full we can be
self.gauge = 0 # how full we are
self.failure = None
self.waiting = [] # callers of add() who are blocked
self.unflushed = ExpandableDeferredList()
def add(self, _size, _func, *args, **kwargs):
# We promise that all the Deferreds we return will fire in the order
# they were returned. To make it easier to keep this promise, we
# prohibit multiple outstanding calls to add() .
if self.waiting:
raise SingleFileError
if self.failure:
return defer.fail(self.failure)
self.gauge += _size
fd = defer.maybeDeferred(_func, *args, **kwargs)
fd.addBoth(self._call_finished, _size)
self.unflushed.addDeferred(fd)
fd.addErrback(self._eat_pipeline_errors)
fd.addErrback(log.err, "_eat_pipeline_errors didn't eat it")
if self.gauge < self.capacity:
return defer.succeed(None)
d = defer.Deferred()
self.waiting.append(d)
return d
def flush(self):
if self.failure:
return defer.fail(self.failure)
d, self.unflushed = self.unflushed, ExpandableDeferredList()
d.close()
d.addErrback(self._flushed_error)
return d
def _flushed_error(self, f):
precondition(self.failure) # should have been set by _call_finished
return self.failure
def _call_finished(self, res, size):
self.gauge -= size
if isinstance(res, Failure):
res = Failure(PipelineError(res))
if not self.failure:
self.failure = res
if self.failure:
while self.waiting:
d = self.waiting.pop(0)
d.errback(self.failure)
else:
while self.waiting and (self.gauge < self.capacity):
d = self.waiting.pop(0)
d.callback(None)
# the d.callback() might trigger a new call to add(), which
# will raise our gauge and might cause the pipeline to be
# filled. So the while() loop gets a chance to tell the
# caller to stop.
return res
def _eat_pipeline_errors(self, f):
f.trap(PipelineError)
return None

View File

@ -86,7 +86,6 @@ commands =
coverage: python -b -m coverage run -m twisted.trial {env:TAHOE_LAFS_TRIAL_ARGS:--rterrors --reporter=timing} {posargs:{env:TEST_SUITE}} coverage: python -b -m coverage run -m twisted.trial {env:TAHOE_LAFS_TRIAL_ARGS:--rterrors --reporter=timing} {posargs:{env:TEST_SUITE}}
coverage: coverage combine coverage: coverage combine
coverage: coverage xml coverage: coverage xml
coverage: coverage report
[testenv:integration] [testenv:integration]
basepython = python3 basepython = python3
@ -99,7 +98,6 @@ commands =
# NOTE: 'run with "py.test --keep-tempdir -s -v integration/" to debug failures' # NOTE: 'run with "py.test --keep-tempdir -s -v integration/" to debug failures'
py.test --timeout=1800 --coverage -s -v {posargs:integration} py.test --timeout=1800 --coverage -s -v {posargs:integration}
coverage combine coverage combine
coverage report
[testenv:codechecks] [testenv:codechecks]