mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-24 07:06:41 +00:00
Merge remote-tracking branch 'origin/master' into 3952-benchmarks
This commit is contained in:
commit
22843c89a5
@ -94,6 +94,9 @@ workflows:
|
||||
{}
|
||||
|
||||
- "integration":
|
||||
# Run even the slow integration tests here. We need the `--` to
|
||||
# sneak past tox and get to pytest.
|
||||
tox-args: "-- --runslow integration"
|
||||
requires:
|
||||
# If the unit test suite doesn't pass, don't bother running the
|
||||
# integration tests.
|
||||
@ -294,6 +297,14 @@ jobs:
|
||||
|
||||
integration:
|
||||
<<: *DEBIAN
|
||||
|
||||
parameters:
|
||||
tox-args:
|
||||
description: >-
|
||||
Additional arguments to pass to the tox command.
|
||||
type: "string"
|
||||
default: ""
|
||||
|
||||
docker:
|
||||
- <<: *DOCKERHUB_AUTH
|
||||
image: "tahoelafsci/debian:11-py3.9"
|
||||
@ -306,6 +317,9 @@ jobs:
|
||||
# Disable artifact collection because py.test can't produce any.
|
||||
ARTIFACTS_OUTPUT_PATH: ""
|
||||
|
||||
# Pass on anything we got in our parameters.
|
||||
TAHOE_LAFS_TOX_ARGS: "<< parameters.tox-args >>"
|
||||
|
||||
steps:
|
||||
- "checkout"
|
||||
# DRY, YAML-style. See the debian-9 steps.
|
||||
|
@ -45,14 +45,15 @@ fi
|
||||
|
||||
# A prefix for the test command that ensure it will exit after no more than a
|
||||
# certain amount of time. Ideally, we would only enforce a "silent" period
|
||||
# timeout but there isn't obviously a ready-made tool for that. The test
|
||||
# suite only takes about 5 - 6 minutes on CircleCI right now. 15 minutes
|
||||
# seems like a moderately safe window.
|
||||
# timeout but there isn't obviously a ready-made tool for that. The unit test
|
||||
# suite only takes about 5 - 6 minutes on CircleCI right now. The integration
|
||||
# tests are a bit longer than that. 45 minutes seems like a moderately safe
|
||||
# window.
|
||||
#
|
||||
# This is primarily aimed at catching hangs on the PyPy job which runs for
|
||||
# about 21 minutes and then gets killed by CircleCI in a way that fails the
|
||||
# job and bypasses our "allowed failure" logic.
|
||||
TIMEOUT="timeout --kill-after 1m 25m"
|
||||
TIMEOUT="timeout --kill-after 1m 45m"
|
||||
|
||||
# Run the test suite as a non-root user. This is the expected usage some
|
||||
# small areas of the test suite assume non-root privileges (such as unreadable
|
||||
|
@ -40,7 +40,6 @@ from .util import (
|
||||
await_client_ready,
|
||||
TahoeProcess,
|
||||
cli,
|
||||
_run_node,
|
||||
generate_ssh_key,
|
||||
block_with_timeout,
|
||||
)
|
||||
@ -63,6 +62,22 @@ def pytest_addoption(parser):
|
||||
help=("If set, force Foolscap only for the storage protocol. " +
|
||||
"Otherwise HTTP will be used.")
|
||||
)
|
||||
parser.addoption(
|
||||
"--runslow", action="store_true", default=False,
|
||||
dest="runslow",
|
||||
help="If set, run tests marked as slow.",
|
||||
)
|
||||
|
||||
def pytest_collection_modifyitems(session, config, items):
|
||||
if not config.option.runslow:
|
||||
# The --runslow option was not given; keep only collected items not
|
||||
# marked as slow.
|
||||
items[:] = [
|
||||
item
|
||||
for item
|
||||
in items
|
||||
if item.get_closest_marker("slow") is None
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope='session')
|
||||
@ -408,10 +423,9 @@ alice-key ssh-rsa {ssh_public_key} {rwcap}
|
||||
""".format(rwcap=rwcap, ssh_public_key=ssh_public_key))
|
||||
|
||||
# 4. Restart the node with new SFTP config.
|
||||
process.kill()
|
||||
pytest_twisted.blockon(_run_node(reactor, process.node_dir, request, None))
|
||||
|
||||
pytest_twisted.blockon(process.restart_async(reactor, request))
|
||||
await_client_ready(process)
|
||||
print(f"Alice pid: {process.transport.pid}")
|
||||
return process
|
||||
|
||||
|
||||
|
119
integration/test_vectors.py
Normal file
119
integration/test_vectors.py
Normal file
@ -0,0 +1,119 @@
|
||||
"""
|
||||
Verify certain results against test vectors with well-known results.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import partial
|
||||
from typing import AsyncGenerator, Iterator
|
||||
from itertools import starmap, product
|
||||
|
||||
from attrs import evolve
|
||||
|
||||
from pytest import mark
|
||||
from pytest_twisted import ensureDeferred
|
||||
|
||||
from . import vectors
|
||||
from .vectors import parameters
|
||||
from .util import reconfigure, upload, TahoeProcess
|
||||
|
||||
@mark.parametrize('convergence', parameters.CONVERGENCE_SECRETS)
|
||||
def test_convergence(convergence):
|
||||
"""
|
||||
Convergence secrets are 16 bytes.
|
||||
"""
|
||||
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
|
||||
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
|
||||
|
||||
|
||||
@mark.slow
|
||||
@mark.parametrize('case,expected', vectors.capabilities.items())
|
||||
@ensureDeferred
|
||||
async def test_capability(reactor, request, alice, case, expected):
|
||||
"""
|
||||
The capability that results from uploading certain well-known data
|
||||
with certain well-known parameters results in exactly the previously
|
||||
computed value.
|
||||
"""
|
||||
# rewrite alice's config to match params and convergence
|
||||
await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
|
||||
|
||||
# upload data in the correct format
|
||||
actual = upload(alice, case.fmt, case.data)
|
||||
|
||||
# compare the resulting cap to the expected result
|
||||
assert actual == expected
|
||||
|
||||
|
||||
@ensureDeferred
|
||||
async def skiptest_generate(reactor, request, alice):
|
||||
"""
|
||||
This is a helper for generating the test vectors.
|
||||
|
||||
You can re-generate the test vectors by fixing the name of the test and
|
||||
running it. Normally this test doesn't run because it ran once and we
|
||||
captured its output. Other tests run against that output and we want them
|
||||
to run against the results produced originally, not a possibly
|
||||
ever-changing set of outputs.
|
||||
"""
|
||||
space = starmap(
|
||||
# segment_size could be a parameter someday but it's not easy to vary
|
||||
# using the Python implementation so it isn't one for now.
|
||||
partial(vectors.Case, segment_size=parameters.SEGMENT_SIZE),
|
||||
product(
|
||||
parameters.ZFEC_PARAMS,
|
||||
parameters.CONVERGENCE_SECRETS,
|
||||
parameters.OBJECT_DESCRIPTIONS,
|
||||
parameters.FORMATS,
|
||||
),
|
||||
)
|
||||
iterresults = generate(reactor, request, alice, space)
|
||||
|
||||
results = []
|
||||
async for result in iterresults:
|
||||
# Accumulate the new result
|
||||
results.append(result)
|
||||
# Then rewrite the whole output file with the new accumulator value.
|
||||
# This means that if we fail partway through, we will still have
|
||||
# recorded partial results -- instead of losing them all.
|
||||
vectors.save_capabilities(results)
|
||||
|
||||
async def generate(
|
||||
reactor,
|
||||
request,
|
||||
alice: TahoeProcess,
|
||||
cases: Iterator[vectors.Case],
|
||||
) -> AsyncGenerator[[vectors.Case, str], None]:
|
||||
"""
|
||||
Generate all of the test vectors using the given node.
|
||||
|
||||
:param reactor: The reactor to use to restart the Tahoe-LAFS node when it
|
||||
needs to be reconfigured.
|
||||
|
||||
:param request: The pytest request object to use to arrange process
|
||||
cleanup.
|
||||
|
||||
:param format: The name of the encryption/data format to use.
|
||||
|
||||
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
|
||||
|
||||
:param case: The inputs for which to generate a value.
|
||||
|
||||
:return: The capability for the case.
|
||||
"""
|
||||
# Share placement doesn't affect the resulting capability. For maximum
|
||||
# reliability of this generator, be happy if we can put shares anywhere
|
||||
happy = 1
|
||||
for case in cases:
|
||||
await reconfigure(
|
||||
reactor,
|
||||
request,
|
||||
alice,
|
||||
(happy, case.params.required, case.params.total),
|
||||
case.convergence
|
||||
)
|
||||
|
||||
# Give the format a chance to make an RSA key if it needs it.
|
||||
case = evolve(case, fmt=case.fmt.customize())
|
||||
cap = upload(alice, case.fmt, case.data)
|
||||
yield case, cap
|
@ -7,18 +7,9 @@ Most of the tests have cursory asserts and encode 'what the WebAPI did
|
||||
at the time of testing' -- not necessarily a cohesive idea of what the
|
||||
WebAPI *should* do in every situation. It's not clear the latter
|
||||
exists anywhere, however.
|
||||
|
||||
Ported to Python 3.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from future.utils import PY2
|
||||
if PY2:
|
||||
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from urllib.parse import unquote as url_unquote, quote as url_quote
|
||||
@ -32,6 +23,7 @@ import requests
|
||||
import html5lib
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from pytest_twisted import ensureDeferred
|
||||
|
||||
def test_index(alice):
|
||||
"""
|
||||
@ -252,10 +244,18 @@ def test_status(alice):
|
||||
assert found_download, "Failed to find the file we downloaded in the status-page"
|
||||
|
||||
|
||||
def test_directory_deep_check(alice):
|
||||
@ensureDeferred
|
||||
async def test_directory_deep_check(reactor, request, alice):
|
||||
"""
|
||||
use deep-check and confirm the result pages work
|
||||
"""
|
||||
# Make sure the node is configured compatibly with expectations of this
|
||||
# test.
|
||||
happy = 3
|
||||
required = 2
|
||||
total = 4
|
||||
|
||||
await util.reconfigure(reactor, request, alice, (happy, required, total), convergence=None)
|
||||
|
||||
# create a directory
|
||||
resp = requests.post(
|
||||
@ -313,7 +313,7 @@ def test_directory_deep_check(alice):
|
||||
)
|
||||
|
||||
def check_repair_data(checkdata):
|
||||
assert checkdata["healthy"] is True
|
||||
assert checkdata["healthy"]
|
||||
assert checkdata["count-happiness"] == 4
|
||||
assert checkdata["count-good-share-hosts"] == 4
|
||||
assert checkdata["count-shares-good"] == 4
|
||||
|
@ -1,14 +1,19 @@
|
||||
"""
|
||||
Ported to Python 3.
|
||||
General functionality useful for the implementation of integration tests.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Any
|
||||
from typing_extensions import Literal
|
||||
from tempfile import NamedTemporaryFile
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
from os import mkdir, environ
|
||||
from os.path import exists, join
|
||||
from io import StringIO, BytesIO
|
||||
from functools import partial
|
||||
from subprocess import check_output
|
||||
|
||||
from twisted.python.filepath import (
|
||||
@ -18,12 +23,23 @@ from twisted.internet.defer import Deferred, succeed
|
||||
from twisted.internet.protocol import ProcessProtocol
|
||||
from twisted.internet.error import ProcessExitedAlready, ProcessDone
|
||||
from twisted.internet.threads import deferToThread
|
||||
from twisted.internet.interfaces import IProcessTransport, IReactorProcess
|
||||
|
||||
from attrs import frozen, evolve
|
||||
import requests
|
||||
|
||||
from cryptography.hazmat.primitives.asymmetric import rsa
|
||||
from cryptography.hazmat.backends import default_backend
|
||||
from cryptography.hazmat.primitives.serialization import (
|
||||
Encoding,
|
||||
PrivateFormat,
|
||||
NoEncryption,
|
||||
)
|
||||
|
||||
from paramiko.rsakey import RSAKey
|
||||
from boltons.funcutils import wraps
|
||||
|
||||
from allmydata.util import base32
|
||||
from allmydata.util.configutil import (
|
||||
get_config,
|
||||
set_config,
|
||||
@ -134,9 +150,40 @@ class _MagicTextProtocol(ProcessProtocol):
|
||||
sys.stdout.write(data)
|
||||
|
||||
|
||||
def _cleanup_tahoe_process(tahoe_transport, exited):
|
||||
def _cleanup_process_async(transport: IProcessTransport, allow_missing: bool) -> None:
|
||||
"""
|
||||
Terminate the given process with a kill signal (SIGKILL on POSIX,
|
||||
If the given process transport seems to still be associated with a
|
||||
running process, send a SIGTERM to that process.
|
||||
|
||||
:param transport: The transport to use.
|
||||
|
||||
:param allow_missing: If ``True`` then it is not an error for the
|
||||
transport to have no associated process. Otherwise, an exception will
|
||||
be raised in that case.
|
||||
|
||||
:raise: ``ValueError`` if ``allow_missing`` is ``False`` and the transport
|
||||
has no process.
|
||||
"""
|
||||
if transport.pid is None:
|
||||
if allow_missing:
|
||||
print("Process already cleaned up and that's okay.")
|
||||
return
|
||||
else:
|
||||
raise ValueError("Process is not running")
|
||||
print("signaling {} with TERM".format(transport.pid))
|
||||
try:
|
||||
transport.signalProcess('TERM')
|
||||
except ProcessExitedAlready:
|
||||
# The transport object thought it still had a process but the real OS
|
||||
# process has already exited. That's fine. We accomplished what we
|
||||
# wanted to. We don't care about ``allow_missing`` here because
|
||||
# there's no way we could have known the real OS process already
|
||||
# exited.
|
||||
pass
|
||||
|
||||
def _cleanup_tahoe_process(tahoe_transport, exited, allow_missing=False):
|
||||
"""
|
||||
Terminate the given process with a kill signal (SIGTERM on POSIX,
|
||||
TerminateProcess on Windows).
|
||||
|
||||
:param tahoe_transport: The `IProcessTransport` representing the process.
|
||||
@ -145,14 +192,10 @@ def _cleanup_tahoe_process(tahoe_transport, exited):
|
||||
:return: After the process has exited.
|
||||
"""
|
||||
from twisted.internet import reactor
|
||||
try:
|
||||
print("signaling {} with TERM".format(tahoe_transport.pid))
|
||||
tahoe_transport.signalProcess('TERM')
|
||||
print("signaled, blocking on exit")
|
||||
block_with_timeout(exited, reactor)
|
||||
print("exited, goodbye")
|
||||
except ProcessExitedAlready:
|
||||
pass
|
||||
_cleanup_process_async(tahoe_transport, allow_missing=allow_missing)
|
||||
print("signaled, blocking on exit")
|
||||
block_with_timeout(exited, reactor)
|
||||
print("exited, goodbye")
|
||||
|
||||
|
||||
def _tahoe_runner_optional_coverage(proto, reactor, request, other_args):
|
||||
@ -199,8 +242,33 @@ class TahoeProcess(object):
|
||||
|
||||
def kill(self):
|
||||
"""Kill the process, block until it's done."""
|
||||
print(f"TahoeProcess.kill({self.transport.pid} / {self.node_dir})")
|
||||
_cleanup_tahoe_process(self.transport, self.transport.exited)
|
||||
|
||||
def kill_async(self):
|
||||
"""
|
||||
Kill the process, return a Deferred that fires when it's done.
|
||||
"""
|
||||
print(f"TahoeProcess.kill_async({self.transport.pid} / {self.node_dir})")
|
||||
_cleanup_process_async(self.transport, allow_missing=False)
|
||||
return self.transport.exited
|
||||
|
||||
def restart_async(self, reactor: IReactorProcess, request: Any) -> Deferred:
|
||||
"""
|
||||
Stop and then re-start the associated process.
|
||||
|
||||
:return: A Deferred that fires after the new process is ready to
|
||||
handle requests.
|
||||
"""
|
||||
d = self.kill_async()
|
||||
d.addCallback(lambda ignored: _run_node(reactor, self.node_dir, request, None, finalize=False))
|
||||
def got_new_process(proc):
|
||||
# Grab the new transport since the one we had before is no longer
|
||||
# valid after the stop/start cycle.
|
||||
self._process_transport = proc.transport
|
||||
d.addCallback(got_new_process)
|
||||
return d
|
||||
|
||||
def __str__(self):
|
||||
return "<TahoeProcess in '{}'>".format(self._node_dir)
|
||||
|
||||
@ -229,19 +297,17 @@ def _run_node(reactor, node_dir, request, magic_text, finalize=True):
|
||||
)
|
||||
transport.exited = protocol.exited
|
||||
|
||||
tahoe_process = TahoeProcess(
|
||||
transport,
|
||||
node_dir,
|
||||
)
|
||||
|
||||
if finalize:
|
||||
request.addfinalizer(partial(_cleanup_tahoe_process, transport, protocol.exited))
|
||||
request.addfinalizer(tahoe_process.kill)
|
||||
|
||||
# XXX abusing the Deferred; should use .when_magic_seen() pattern
|
||||
|
||||
def got_proto(proto):
|
||||
transport._protocol = proto
|
||||
return TahoeProcess(
|
||||
transport,
|
||||
node_dir,
|
||||
)
|
||||
protocol.magic_seen.addCallback(got_proto)
|
||||
return protocol.magic_seen
|
||||
d = protocol.magic_seen
|
||||
d.addCallback(lambda ignored: tahoe_process)
|
||||
return d
|
||||
|
||||
|
||||
def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, name, web_port,
|
||||
@ -572,3 +638,158 @@ def run_in_thread(f):
|
||||
def test(*args, **kwargs):
|
||||
return deferToThread(lambda: f(*args, **kwargs))
|
||||
return test
|
||||
|
||||
@frozen
|
||||
class CHK:
|
||||
"""
|
||||
Represent the CHK encoding sufficiently to run a ``tahoe put`` command
|
||||
using it.
|
||||
"""
|
||||
kind = "chk"
|
||||
max_shares = 256
|
||||
|
||||
def customize(self) -> CHK:
|
||||
# Nothing to do.
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def load(cls, params: None) -> CHK:
|
||||
assert params is None
|
||||
return cls()
|
||||
|
||||
def to_json(self) -> None:
|
||||
return None
|
||||
|
||||
@contextmanager
|
||||
def to_argv(self) -> None:
|
||||
yield []
|
||||
|
||||
@frozen
|
||||
class SSK:
|
||||
"""
|
||||
Represent the SSK encodings (SDMF and MDMF) sufficiently to run a
|
||||
``tahoe put`` command using one of them.
|
||||
"""
|
||||
kind = "ssk"
|
||||
|
||||
# SDMF and MDMF encode share counts (N and k) into the share itself as an
|
||||
# unsigned byte. They could have encoded (share count - 1) to fit the
|
||||
# full range supported by ZFEC into the unsigned byte - but they don't.
|
||||
# So 256 is inaccessible to those formats and we set the upper bound at
|
||||
# 255.
|
||||
max_shares = 255
|
||||
|
||||
name: Literal["sdmf", "mdmf"]
|
||||
key: None | bytes
|
||||
|
||||
@classmethod
|
||||
def load(cls, params: dict) -> SSK:
|
||||
assert params.keys() == {"format", "mutable", "key"}
|
||||
return cls(params["format"], params["key"].encode("ascii"))
|
||||
|
||||
def customize(self) -> SSK:
|
||||
"""
|
||||
Return an SSK with a newly generated random RSA key.
|
||||
"""
|
||||
return evolve(self, key=generate_rsa_key())
|
||||
|
||||
def to_json(self) -> dict[str, str]:
|
||||
return {
|
||||
"format": self.name,
|
||||
"mutable": None,
|
||||
"key": self.key.decode("ascii"),
|
||||
}
|
||||
|
||||
@contextmanager
|
||||
def to_argv(self) -> None:
|
||||
with NamedTemporaryFile() as f:
|
||||
f.write(self.key)
|
||||
f.flush()
|
||||
yield [f"--format={self.name}", "--mutable", f"--private-key-path={f.name}"]
|
||||
|
||||
|
||||
def upload(alice: TahoeProcess, fmt: CHK | SSK, data: bytes) -> str:
|
||||
"""
|
||||
Upload the given data to the given node.
|
||||
|
||||
:param alice: The node to upload to.
|
||||
|
||||
:param fmt: The name of the format for the upload. CHK, SDMF, or MDMF.
|
||||
|
||||
:param data: The data to upload.
|
||||
|
||||
:return: The capability for the uploaded data.
|
||||
"""
|
||||
|
||||
with NamedTemporaryFile() as f:
|
||||
f.write(data)
|
||||
f.flush()
|
||||
with fmt.to_argv() as fmt_argv:
|
||||
argv = [alice, "put"] + fmt_argv + [f.name]
|
||||
return cli(*argv).decode("utf-8").strip()
|
||||
|
||||
|
||||
async def reconfigure(reactor, request, node: TahoeProcess, params: tuple[int, int, int], convergence: None | bytes) -> None:
|
||||
"""
|
||||
Reconfigure a Tahoe-LAFS node with different ZFEC parameters and
|
||||
convergence secret.
|
||||
|
||||
If the current configuration is different from the specified
|
||||
configuration, the node will be restarted so it takes effect.
|
||||
|
||||
:param reactor: A reactor to use to restart the process.
|
||||
:param request: The pytest request object to use to arrange process
|
||||
cleanup.
|
||||
:param node: The Tahoe-LAFS node to reconfigure.
|
||||
:param params: The ``happy``, ``needed``, and ``total`` ZFEC encoding
|
||||
parameters.
|
||||
:param convergence: If given, the convergence secret. If not given, the
|
||||
existing convergence secret will be left alone.
|
||||
|
||||
:return: ``None`` after the node configuration has been rewritten, the
|
||||
node has been restarted, and the node is ready to provide service.
|
||||
"""
|
||||
happy, needed, total = params
|
||||
config = node.get_config()
|
||||
|
||||
changed = False
|
||||
cur_happy = int(config.get_config("client", "shares.happy"))
|
||||
cur_needed = int(config.get_config("client", "shares.needed"))
|
||||
cur_total = int(config.get_config("client", "shares.total"))
|
||||
|
||||
if (happy, needed, total) != (cur_happy, cur_needed, cur_total):
|
||||
changed = True
|
||||
config.set_config("client", "shares.happy", str(happy))
|
||||
config.set_config("client", "shares.needed", str(needed))
|
||||
config.set_config("client", "shares.total", str(total))
|
||||
|
||||
if convergence is not None:
|
||||
cur_convergence = config.get_private_config("convergence").encode("ascii")
|
||||
if base32.a2b(cur_convergence) != convergence:
|
||||
changed = True
|
||||
config.write_private_config("convergence", base32.b2a(convergence))
|
||||
|
||||
if changed:
|
||||
# restart the node
|
||||
print(f"Restarting {node.node_dir} for ZFEC reconfiguration")
|
||||
await node.restart_async(reactor, request)
|
||||
print("Restarted. Waiting for ready state.")
|
||||
await_client_ready(node)
|
||||
print("Ready.")
|
||||
else:
|
||||
print("Config unchanged, not restarting.")
|
||||
|
||||
|
||||
def generate_rsa_key() -> bytes:
|
||||
"""
|
||||
Generate a 2048 bit RSA key suitable for use with SSKs.
|
||||
"""
|
||||
return rsa.generate_private_key(
|
||||
public_exponent=65537,
|
||||
key_size=2048,
|
||||
backend=default_backend()
|
||||
).private_bytes(
|
||||
encoding=Encoding.PEM,
|
||||
format=PrivateFormat.TraditionalOpenSSL,
|
||||
encryption_algorithm=NoEncryption(),
|
||||
)
|
||||
|
30
integration/vectors/__init__.py
Normal file
30
integration/vectors/__init__.py
Normal file
@ -0,0 +1,30 @@
|
||||
__all__ = [
|
||||
"DATA_PATH",
|
||||
"CURRENT_VERSION",
|
||||
"MAX_SHARES",
|
||||
|
||||
"Case",
|
||||
"Sample",
|
||||
"SeedParam",
|
||||
"encode_bytes",
|
||||
"save_capabilities",
|
||||
|
||||
"capabilities",
|
||||
]
|
||||
|
||||
from .vectors import (
|
||||
DATA_PATH,
|
||||
CURRENT_VERSION,
|
||||
|
||||
Case,
|
||||
Sample,
|
||||
SeedParam,
|
||||
encode_bytes,
|
||||
save_capabilities,
|
||||
|
||||
capabilities,
|
||||
)
|
||||
|
||||
from .parameters import (
|
||||
MAX_SHARES,
|
||||
)
|
58
integration/vectors/model.py
Normal file
58
integration/vectors/model.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""
|
||||
Simple data type definitions useful in the definition/verification of test
|
||||
vectors.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from attrs import frozen
|
||||
|
||||
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
|
||||
# Represent max symbolically and resolve it when we know what format we're
|
||||
# dealing with.
|
||||
MAX_SHARES = "max"
|
||||
|
||||
@frozen
|
||||
class Sample:
|
||||
"""
|
||||
Some instructions for building a long byte string.
|
||||
|
||||
:ivar seed: Some bytes to repeat some times to produce the string.
|
||||
:ivar length: The length of the desired byte string.
|
||||
"""
|
||||
seed: bytes
|
||||
length: int
|
||||
|
||||
@frozen
|
||||
class Param:
|
||||
"""
|
||||
Some ZFEC parameters.
|
||||
"""
|
||||
required: int
|
||||
total: int
|
||||
|
||||
@frozen
|
||||
class SeedParam:
|
||||
"""
|
||||
Some ZFEC parameters, almost.
|
||||
|
||||
:ivar required: The number of required shares.
|
||||
|
||||
:ivar total: Either the number of total shares or the constant
|
||||
``MAX_SHARES`` to indicate that the total number of shares should be
|
||||
the maximum number supported by the object format.
|
||||
"""
|
||||
required: int
|
||||
total: int | str
|
||||
|
||||
def realize(self, max_total: int) -> Param:
|
||||
"""
|
||||
Create a ``Param`` from this object's values, possibly
|
||||
substituting the given real value for total if necessary.
|
||||
|
||||
:param max_total: The value to use to replace ``MAX_SHARES`` if
|
||||
necessary.
|
||||
"""
|
||||
if self.total == MAX_SHARES:
|
||||
return Param(self.required, max_total)
|
||||
return Param(self.required, self.total)
|
93
integration/vectors/parameters.py
Normal file
93
integration/vectors/parameters.py
Normal file
@ -0,0 +1,93 @@
|
||||
"""
|
||||
Define input parameters for test vector generation.
|
||||
|
||||
:ivar CONVERGENCE_SECRETS: Convergence secrets.
|
||||
|
||||
:ivar SEGMENT_SIZE: The single segment size that the Python implementation
|
||||
currently supports without a lot of refactoring.
|
||||
|
||||
:ivar OBJECT_DESCRIPTIONS: Small objects with instructions which can be
|
||||
expanded into a possibly large byte string. These are intended to be used
|
||||
as plaintext inputs.
|
||||
|
||||
:ivar ZFEC_PARAMS: Input parameters to ZFEC.
|
||||
|
||||
:ivar FORMATS: Encoding/encryption formats.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from hashlib import sha256
|
||||
|
||||
from .model import MAX_SHARES
|
||||
from .vectors import Sample, SeedParam
|
||||
from ..util import CHK, SSK
|
||||
|
||||
def digest(bs: bytes) -> bytes:
|
||||
"""
|
||||
Digest bytes to bytes.
|
||||
"""
|
||||
return sha256(bs).digest()
|
||||
|
||||
|
||||
def hexdigest(bs: bytes) -> str:
|
||||
"""
|
||||
Digest bytes to text.
|
||||
"""
|
||||
return sha256(bs).hexdigest()
|
||||
|
||||
# Just a couple convergence secrets. The only thing we do with this value is
|
||||
# feed it into a tagged hash. It certainly makes a difference to the output
|
||||
# but the hash should destroy any structure in the input so it doesn't seem
|
||||
# like there's a reason to test a lot of different values.
|
||||
CONVERGENCE_SECRETS: list[bytes] = [
|
||||
b"aaaaaaaaaaaaaaaa",
|
||||
digest(b"Hello world")[:16],
|
||||
]
|
||||
|
||||
SEGMENT_SIZE: int = 128 * 1024
|
||||
|
||||
# Exercise at least a handful of different sizes, trying to cover:
|
||||
#
|
||||
# 1. Some cases smaller than one "segment" (128k).
|
||||
# This covers shrinking of some parameters to match data size.
|
||||
# This includes one case of the smallest possible CHK.
|
||||
#
|
||||
# 2. Some cases right on the edges of integer segment multiples.
|
||||
# Because boundaries are tricky.
|
||||
#
|
||||
# 4. Some cases that involve quite a few segments.
|
||||
# This exercises merkle tree construction more thoroughly.
|
||||
#
|
||||
# See ``stretch`` for construction of the actual test data.
|
||||
OBJECT_DESCRIPTIONS: list[Sample] = [
|
||||
# The smallest possible. 55 bytes and smaller are LIT.
|
||||
Sample(b"a", 56),
|
||||
Sample(b"a", 1024),
|
||||
Sample(b"c", 4096),
|
||||
Sample(digest(b"foo"), SEGMENT_SIZE - 1),
|
||||
Sample(digest(b"bar"), SEGMENT_SIZE + 1),
|
||||
Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
|
||||
Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
|
||||
Sample(digest(b"bazquux"), SEGMENT_SIZE * 32),
|
||||
Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
|
||||
Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
|
||||
]
|
||||
|
||||
ZFEC_PARAMS: list[SeedParam] = [
|
||||
SeedParam(1, 1),
|
||||
SeedParam(1, 3),
|
||||
SeedParam(2, 3),
|
||||
SeedParam(3, 10),
|
||||
SeedParam(71, 255),
|
||||
SeedParam(101, MAX_SHARES),
|
||||
]
|
||||
|
||||
FORMATS: list[CHK | SSK] = [
|
||||
CHK(),
|
||||
|
||||
# These start out unaware of a key but various keys will be supplied
|
||||
# during generation.
|
||||
SSK(name="sdmf", key=None),
|
||||
SSK(name="mdmf", key=None),
|
||||
]
|
18002
integration/vectors/test_vectors.yaml
Executable file
18002
integration/vectors/test_vectors.yaml
Executable file
File diff suppressed because it is too large
Load Diff
155
integration/vectors/vectors.py
Normal file
155
integration/vectors/vectors.py
Normal file
@ -0,0 +1,155 @@
|
||||
"""
|
||||
A module that loads pre-generated test vectors.
|
||||
|
||||
:ivar DATA_PATH: The path of the file containing test vectors.
|
||||
|
||||
:ivar capabilities: The capability test vectors.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TextIO
|
||||
from attrs import frozen
|
||||
from yaml import safe_load, safe_dump
|
||||
from base64 import b64encode, b64decode
|
||||
|
||||
from twisted.python.filepath import FilePath
|
||||
|
||||
from .model import Param, Sample, SeedParam
|
||||
from ..util import CHK, SSK
|
||||
|
||||
DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
|
||||
|
||||
# The version of the persisted test vector data this code can interpret.
|
||||
CURRENT_VERSION: str = "2023-01-16.2"
|
||||
|
||||
@frozen
|
||||
class Case:
|
||||
"""
|
||||
Represent one case for which we want/have a test vector.
|
||||
"""
|
||||
seed_params: Param
|
||||
convergence: bytes
|
||||
seed_data: Sample
|
||||
fmt: CHK | SSK
|
||||
segment_size: int
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return stretch(self.seed_data.seed, self.seed_data.length)
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
return self.seed_params.realize(self.fmt.max_shares)
|
||||
|
||||
|
||||
def encode_bytes(b: bytes) -> str:
|
||||
"""
|
||||
Base64 encode some bytes to text so they are representable in JSON.
|
||||
"""
|
||||
return b64encode(b).decode("ascii")
|
||||
|
||||
|
||||
def decode_bytes(b: str) -> bytes:
|
||||
"""
|
||||
Base64 decode some text to bytes.
|
||||
"""
|
||||
return b64decode(b.encode("ascii"))
|
||||
|
||||
|
||||
def stretch(seed: bytes, size: int) -> bytes:
|
||||
"""
|
||||
Given a simple description of a byte string, return the byte string
|
||||
itself.
|
||||
"""
|
||||
assert isinstance(seed, bytes)
|
||||
assert isinstance(size, int)
|
||||
assert size > 0
|
||||
assert len(seed) > 0
|
||||
|
||||
multiples = size // len(seed) + 1
|
||||
return (seed * multiples)[:size]
|
||||
|
||||
|
||||
def save_capabilities(results: list[tuple[Case, str]], path: FilePath = DATA_PATH) -> None:
|
||||
"""
|
||||
Save some test vector cases and their expected values.
|
||||
|
||||
This is logically the inverse of ``load_capabilities``.
|
||||
"""
|
||||
path.setContent(safe_dump({
|
||||
"version": CURRENT_VERSION,
|
||||
"vector": [
|
||||
{
|
||||
"convergence": encode_bytes(case.convergence),
|
||||
"format": {
|
||||
"kind": case.fmt.kind,
|
||||
"params": case.fmt.to_json(),
|
||||
},
|
||||
"sample": {
|
||||
"seed": encode_bytes(case.seed_data.seed),
|
||||
"length": case.seed_data.length,
|
||||
},
|
||||
"zfec": {
|
||||
"segmentSize": case.segment_size,
|
||||
"required": case.params.required,
|
||||
"total": case.params.total,
|
||||
},
|
||||
"expected": cap,
|
||||
}
|
||||
for (case, cap)
|
||||
in results
|
||||
],
|
||||
}).encode("ascii"))
|
||||
|
||||
|
||||
def load_format(serialized: dict) -> CHK | SSK:
|
||||
"""
|
||||
Load an encrypted object format from a simple description of it.
|
||||
|
||||
:param serialized: A ``dict`` describing either CHK or SSK, possibly with
|
||||
some parameters.
|
||||
"""
|
||||
if serialized["kind"] == "chk":
|
||||
return CHK.load(serialized["params"])
|
||||
elif serialized["kind"] == "ssk":
|
||||
return SSK.load(serialized["params"])
|
||||
else:
|
||||
raise ValueError(f"Unrecognized format: {serialized}")
|
||||
|
||||
|
||||
def load_capabilities(f: TextIO) -> dict[Case, str]:
|
||||
"""
|
||||
Load some test vector cases and their expected results from the given
|
||||
file.
|
||||
|
||||
This is logically the inverse of ``save_capabilities``.
|
||||
"""
|
||||
data = safe_load(f)
|
||||
if data is None:
|
||||
return {}
|
||||
if data["version"] != CURRENT_VERSION:
|
||||
print(
|
||||
f"Current version is {CURRENT_VERSION}; "
|
||||
f"cannot load version {data['version']} data."
|
||||
)
|
||||
return {}
|
||||
|
||||
return {
|
||||
Case(
|
||||
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
|
||||
segment_size=case["zfec"]["segmentSize"],
|
||||
convergence=decode_bytes(case["convergence"]),
|
||||
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
|
||||
fmt=load_format(case["format"]),
|
||||
): case["expected"]
|
||||
for case
|
||||
in data["vector"]
|
||||
}
|
||||
|
||||
|
||||
try:
|
||||
with DATA_PATH.open() as f:
|
||||
capabilities: dict[Case, str] = load_capabilities(f)
|
||||
except FileNotFoundError:
|
||||
capabilities = {}
|
1
newsfragments/3961.other
Normal file
1
newsfragments/3961.other
Normal file
@ -0,0 +1 @@
|
||||
The integration test suite now includes a set of capability test vectors (``integration/vectors/test_vectors.yaml``) which can be used to verify compatibility between Tahoe-LAFS and other implementations.
|
0
newsfragments/3967.minor
Normal file
0
newsfragments/3967.minor
Normal file
@ -1,14 +1,14 @@
|
||||
{
|
||||
"mach-nix": {
|
||||
"branch": "master",
|
||||
"branch": "switch-to-nix-pypi-fetcher-2",
|
||||
"description": "Create highly reproducible python environments",
|
||||
"homepage": "",
|
||||
"owner": "davhau",
|
||||
"owner": "PrivateStorageio",
|
||||
"repo": "mach-nix",
|
||||
"rev": "bdc97ba6b2ecd045a467b008cff4ae337b6a7a6b",
|
||||
"sha256": "12b3jc0g0ak6s93g3ifvdpwxbyqx276k1kl66bpwz8a67qjbcbwf",
|
||||
"rev": "f6d1a1841d8778c199326f95d0703c16bee2f8c4",
|
||||
"sha256": "0krc4yhnpbzc4yhja9frnmym2vqm5zyacjnqb3fq9z9gav8vs9ls",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/davhau/mach-nix/archive/bdc97ba6b2ecd045a467b008cff4ae337b6a7a6b.tar.gz",
|
||||
"url": "https://github.com/PrivateStorageio/mach-nix/archive/f6d1a1841d8778c199326f95d0703c16bee2f8c4.tar.gz",
|
||||
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
|
||||
},
|
||||
"niv": {
|
||||
@ -53,10 +53,10 @@
|
||||
"homepage": "",
|
||||
"owner": "DavHau",
|
||||
"repo": "pypi-deps-db",
|
||||
"rev": "5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6",
|
||||
"sha256": "0pc6mj7rzvmhh303rvj5wf4hrksm4h2rf4fsvqs0ljjdmgxrqm3f",
|
||||
"rev": "5440c9c76f6431f300fb6a1ecae762a5444de5f6",
|
||||
"sha256": "08r3iiaxzw9v2gq15y1m9bwajshyyz9280g6aia7mkgnjs9hnd1n",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/DavHau/pypi-deps-db/archive/5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6.tar.gz",
|
||||
"url": "https://github.com/DavHau/pypi-deps-db/archive/5440c9c76f6431f300fb6a1ecae762a5444de5f6.tar.gz",
|
||||
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
|
||||
}
|
||||
}
|
||||
|
3
pytest.ini
Normal file
3
pytest.ini
Normal file
@ -0,0 +1,3 @@
|
||||
[pytest]
|
||||
markers =
|
||||
slow: marks tests as slow (not run by default; run them with '--runslow')
|
9
setup.py
9
setup.py
@ -139,11 +139,10 @@ install_requires = [
|
||||
"werkzeug != 2.2.0",
|
||||
"treq",
|
||||
"cbor2",
|
||||
# Ideally we want 0.4+ to be able to pass in mmap(), but it's not strictly
|
||||
# necessary yet until we fix the workaround to
|
||||
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3963 in
|
||||
# allmydata.storage.http_server.
|
||||
"pycddl",
|
||||
|
||||
# 0.4 adds the ability to pass in mmap() values which greatly reduces the
|
||||
# amount of copying involved.
|
||||
"pycddl >= 0.4",
|
||||
|
||||
# for pid-file support
|
||||
"psutil",
|
||||
|
@ -11,7 +11,6 @@ import binascii
|
||||
from tempfile import TemporaryFile
|
||||
from os import SEEK_END, SEEK_SET
|
||||
import mmap
|
||||
from importlib.metadata import version as get_package_version, PackageNotFoundError
|
||||
|
||||
from cryptography.x509 import Certificate as CryptoCertificate
|
||||
from zope.interface import implementer
|
||||
@ -60,20 +59,6 @@ from ..util.base32 import rfc3548_alphabet
|
||||
from allmydata.interfaces import BadWriteEnablerError
|
||||
|
||||
|
||||
# Until we figure out Nix (https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3963),
|
||||
# need to support old pycddl which can only take bytes:
|
||||
from distutils.version import LooseVersion
|
||||
|
||||
try:
|
||||
PYCDDL_BYTES_ONLY = LooseVersion(get_package_version("pycddl")) < LooseVersion(
|
||||
"0.4"
|
||||
)
|
||||
except PackageNotFoundError:
|
||||
# This can happen when building PyInstaller distribution. We'll just assume
|
||||
# you installed a modern pycddl, cause why wouldn't you?
|
||||
PYCDDL_BYTES_ONLY = False
|
||||
|
||||
|
||||
class ClientSecretsException(Exception):
|
||||
"""The client did not send the appropriate secrets."""
|
||||
|
||||
@ -572,7 +557,7 @@ class HTTPServer(object):
|
||||
fd = request.content.fileno()
|
||||
except (ValueError, OSError):
|
||||
fd = -1
|
||||
if fd >= 0 and not PYCDDL_BYTES_ONLY:
|
||||
if fd >= 0:
|
||||
# It's a file, so we can use mmap() to save memory.
|
||||
message = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user