Merge remote-tracking branch 'origin/master' into 3952-benchmarks

This commit is contained in:
Itamar Turner-Trauring 2023-01-25 10:56:36 -05:00
commit 22843c89a5
17 changed files with 18767 additions and 72 deletions

View File

@ -94,6 +94,9 @@ workflows:
{} {}
- "integration": - "integration":
# Run even the slow integration tests here. We need the `--` to
# sneak past tox and get to pytest.
tox-args: "-- --runslow integration"
requires: requires:
# If the unit test suite doesn't pass, don't bother running the # If the unit test suite doesn't pass, don't bother running the
# integration tests. # integration tests.
@ -294,6 +297,14 @@ jobs:
integration: integration:
<<: *DEBIAN <<: *DEBIAN
parameters:
tox-args:
description: >-
Additional arguments to pass to the tox command.
type: "string"
default: ""
docker: docker:
- <<: *DOCKERHUB_AUTH - <<: *DOCKERHUB_AUTH
image: "tahoelafsci/debian:11-py3.9" image: "tahoelafsci/debian:11-py3.9"
@ -306,6 +317,9 @@ jobs:
# Disable artifact collection because py.test can't produce any. # Disable artifact collection because py.test can't produce any.
ARTIFACTS_OUTPUT_PATH: "" ARTIFACTS_OUTPUT_PATH: ""
# Pass on anything we got in our parameters.
TAHOE_LAFS_TOX_ARGS: "<< parameters.tox-args >>"
steps: steps:
- "checkout" - "checkout"
# DRY, YAML-style. See the debian-9 steps. # DRY, YAML-style. See the debian-9 steps.

View File

@ -45,14 +45,15 @@ fi
# A prefix for the test command that ensure it will exit after no more than a # A prefix for the test command that ensure it will exit after no more than a
# certain amount of time. Ideally, we would only enforce a "silent" period # certain amount of time. Ideally, we would only enforce a "silent" period
# timeout but there isn't obviously a ready-made tool for that. The test # timeout but there isn't obviously a ready-made tool for that. The unit test
# suite only takes about 5 - 6 minutes on CircleCI right now. 15 minutes # suite only takes about 5 - 6 minutes on CircleCI right now. The integration
# seems like a moderately safe window. # tests are a bit longer than that. 45 minutes seems like a moderately safe
# window.
# #
# This is primarily aimed at catching hangs on the PyPy job which runs for # This is primarily aimed at catching hangs on the PyPy job which runs for
# about 21 minutes and then gets killed by CircleCI in a way that fails the # about 21 minutes and then gets killed by CircleCI in a way that fails the
# job and bypasses our "allowed failure" logic. # job and bypasses our "allowed failure" logic.
TIMEOUT="timeout --kill-after 1m 25m" TIMEOUT="timeout --kill-after 1m 45m"
# Run the test suite as a non-root user. This is the expected usage some # Run the test suite as a non-root user. This is the expected usage some
# small areas of the test suite assume non-root privileges (such as unreadable # small areas of the test suite assume non-root privileges (such as unreadable

View File

@ -40,7 +40,6 @@ from .util import (
await_client_ready, await_client_ready,
TahoeProcess, TahoeProcess,
cli, cli,
_run_node,
generate_ssh_key, generate_ssh_key,
block_with_timeout, block_with_timeout,
) )
@ -63,6 +62,22 @@ def pytest_addoption(parser):
help=("If set, force Foolscap only for the storage protocol. " + help=("If set, force Foolscap only for the storage protocol. " +
"Otherwise HTTP will be used.") "Otherwise HTTP will be used.")
) )
parser.addoption(
"--runslow", action="store_true", default=False,
dest="runslow",
help="If set, run tests marked as slow.",
)
def pytest_collection_modifyitems(session, config, items):
if not config.option.runslow:
# The --runslow option was not given; keep only collected items not
# marked as slow.
items[:] = [
item
for item
in items
if item.get_closest_marker("slow") is None
]
@pytest.fixture(autouse=True, scope='session') @pytest.fixture(autouse=True, scope='session')
@ -408,10 +423,9 @@ alice-key ssh-rsa {ssh_public_key} {rwcap}
""".format(rwcap=rwcap, ssh_public_key=ssh_public_key)) """.format(rwcap=rwcap, ssh_public_key=ssh_public_key))
# 4. Restart the node with new SFTP config. # 4. Restart the node with new SFTP config.
process.kill() pytest_twisted.blockon(process.restart_async(reactor, request))
pytest_twisted.blockon(_run_node(reactor, process.node_dir, request, None))
await_client_ready(process) await_client_ready(process)
print(f"Alice pid: {process.transport.pid}")
return process return process

119
integration/test_vectors.py Normal file
View File

@ -0,0 +1,119 @@
"""
Verify certain results against test vectors with well-known results.
"""
from __future__ import annotations
from functools import partial
from typing import AsyncGenerator, Iterator
from itertools import starmap, product
from attrs import evolve
from pytest import mark
from pytest_twisted import ensureDeferred
from . import vectors
from .vectors import parameters
from .util import reconfigure, upload, TahoeProcess
@mark.parametrize('convergence', parameters.CONVERGENCE_SECRETS)
def test_convergence(convergence):
"""
Convergence secrets are 16 bytes.
"""
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
@mark.slow
@mark.parametrize('case,expected', vectors.capabilities.items())
@ensureDeferred
async def test_capability(reactor, request, alice, case, expected):
"""
The capability that results from uploading certain well-known data
with certain well-known parameters results in exactly the previously
computed value.
"""
# rewrite alice's config to match params and convergence
await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
# upload data in the correct format
actual = upload(alice, case.fmt, case.data)
# compare the resulting cap to the expected result
assert actual == expected
@ensureDeferred
async def skiptest_generate(reactor, request, alice):
"""
This is a helper for generating the test vectors.
You can re-generate the test vectors by fixing the name of the test and
running it. Normally this test doesn't run because it ran once and we
captured its output. Other tests run against that output and we want them
to run against the results produced originally, not a possibly
ever-changing set of outputs.
"""
space = starmap(
# segment_size could be a parameter someday but it's not easy to vary
# using the Python implementation so it isn't one for now.
partial(vectors.Case, segment_size=parameters.SEGMENT_SIZE),
product(
parameters.ZFEC_PARAMS,
parameters.CONVERGENCE_SECRETS,
parameters.OBJECT_DESCRIPTIONS,
parameters.FORMATS,
),
)
iterresults = generate(reactor, request, alice, space)
results = []
async for result in iterresults:
# Accumulate the new result
results.append(result)
# Then rewrite the whole output file with the new accumulator value.
# This means that if we fail partway through, we will still have
# recorded partial results -- instead of losing them all.
vectors.save_capabilities(results)
async def generate(
reactor,
request,
alice: TahoeProcess,
cases: Iterator[vectors.Case],
) -> AsyncGenerator[[vectors.Case, str], None]:
"""
Generate all of the test vectors using the given node.
:param reactor: The reactor to use to restart the Tahoe-LAFS node when it
needs to be reconfigured.
:param request: The pytest request object to use to arrange process
cleanup.
:param format: The name of the encryption/data format to use.
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
:param case: The inputs for which to generate a value.
:return: The capability for the case.
"""
# Share placement doesn't affect the resulting capability. For maximum
# reliability of this generator, be happy if we can put shares anywhere
happy = 1
for case in cases:
await reconfigure(
reactor,
request,
alice,
(happy, case.params.required, case.params.total),
case.convergence
)
# Give the format a chance to make an RSA key if it needs it.
case = evolve(case, fmt=case.fmt.customize())
cap = upload(alice, case.fmt, case.data)
yield case, cap

View File

@ -7,18 +7,9 @@ Most of the tests have cursory asserts and encode 'what the WebAPI did
at the time of testing' -- not necessarily a cohesive idea of what the at the time of testing' -- not necessarily a cohesive idea of what the
WebAPI *should* do in every situation. It's not clear the latter WebAPI *should* do in every situation. It's not clear the latter
exists anywhere, however. exists anywhere, however.
Ported to Python 3.
""" """
from __future__ import unicode_literals from __future__ import annotations
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import time import time
from urllib.parse import unquote as url_unquote, quote as url_quote from urllib.parse import unquote as url_unquote, quote as url_quote
@ -32,6 +23,7 @@ import requests
import html5lib import html5lib
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from pytest_twisted import ensureDeferred
def test_index(alice): def test_index(alice):
""" """
@ -252,10 +244,18 @@ def test_status(alice):
assert found_download, "Failed to find the file we downloaded in the status-page" assert found_download, "Failed to find the file we downloaded in the status-page"
def test_directory_deep_check(alice): @ensureDeferred
async def test_directory_deep_check(reactor, request, alice):
""" """
use deep-check and confirm the result pages work use deep-check and confirm the result pages work
""" """
# Make sure the node is configured compatibly with expectations of this
# test.
happy = 3
required = 2
total = 4
await util.reconfigure(reactor, request, alice, (happy, required, total), convergence=None)
# create a directory # create a directory
resp = requests.post( resp = requests.post(
@ -313,7 +313,7 @@ def test_directory_deep_check(alice):
) )
def check_repair_data(checkdata): def check_repair_data(checkdata):
assert checkdata["healthy"] is True assert checkdata["healthy"]
assert checkdata["count-happiness"] == 4 assert checkdata["count-happiness"] == 4
assert checkdata["count-good-share-hosts"] == 4 assert checkdata["count-good-share-hosts"] == 4
assert checkdata["count-shares-good"] == 4 assert checkdata["count-shares-good"] == 4

View File

@ -1,14 +1,19 @@
""" """
Ported to Python 3. General functionality useful for the implementation of integration tests.
""" """
from __future__ import annotations
from contextlib import contextmanager
from typing import Any
from typing_extensions import Literal
from tempfile import NamedTemporaryFile
import sys import sys
import time import time
import json import json
from os import mkdir, environ from os import mkdir, environ
from os.path import exists, join from os.path import exists, join
from io import StringIO, BytesIO from io import StringIO, BytesIO
from functools import partial
from subprocess import check_output from subprocess import check_output
from twisted.python.filepath import ( from twisted.python.filepath import (
@ -18,12 +23,23 @@ from twisted.internet.defer import Deferred, succeed
from twisted.internet.protocol import ProcessProtocol from twisted.internet.protocol import ProcessProtocol
from twisted.internet.error import ProcessExitedAlready, ProcessDone from twisted.internet.error import ProcessExitedAlready, ProcessDone
from twisted.internet.threads import deferToThread from twisted.internet.threads import deferToThread
from twisted.internet.interfaces import IProcessTransport, IReactorProcess
from attrs import frozen, evolve
import requests import requests
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.serialization import (
Encoding,
PrivateFormat,
NoEncryption,
)
from paramiko.rsakey import RSAKey from paramiko.rsakey import RSAKey
from boltons.funcutils import wraps from boltons.funcutils import wraps
from allmydata.util import base32
from allmydata.util.configutil import ( from allmydata.util.configutil import (
get_config, get_config,
set_config, set_config,
@ -134,9 +150,40 @@ class _MagicTextProtocol(ProcessProtocol):
sys.stdout.write(data) sys.stdout.write(data)
def _cleanup_tahoe_process(tahoe_transport, exited): def _cleanup_process_async(transport: IProcessTransport, allow_missing: bool) -> None:
""" """
Terminate the given process with a kill signal (SIGKILL on POSIX, If the given process transport seems to still be associated with a
running process, send a SIGTERM to that process.
:param transport: The transport to use.
:param allow_missing: If ``True`` then it is not an error for the
transport to have no associated process. Otherwise, an exception will
be raised in that case.
:raise: ``ValueError`` if ``allow_missing`` is ``False`` and the transport
has no process.
"""
if transport.pid is None:
if allow_missing:
print("Process already cleaned up and that's okay.")
return
else:
raise ValueError("Process is not running")
print("signaling {} with TERM".format(transport.pid))
try:
transport.signalProcess('TERM')
except ProcessExitedAlready:
# The transport object thought it still had a process but the real OS
# process has already exited. That's fine. We accomplished what we
# wanted to. We don't care about ``allow_missing`` here because
# there's no way we could have known the real OS process already
# exited.
pass
def _cleanup_tahoe_process(tahoe_transport, exited, allow_missing=False):
"""
Terminate the given process with a kill signal (SIGTERM on POSIX,
TerminateProcess on Windows). TerminateProcess on Windows).
:param tahoe_transport: The `IProcessTransport` representing the process. :param tahoe_transport: The `IProcessTransport` representing the process.
@ -145,14 +192,10 @@ def _cleanup_tahoe_process(tahoe_transport, exited):
:return: After the process has exited. :return: After the process has exited.
""" """
from twisted.internet import reactor from twisted.internet import reactor
try: _cleanup_process_async(tahoe_transport, allow_missing=allow_missing)
print("signaling {} with TERM".format(tahoe_transport.pid))
tahoe_transport.signalProcess('TERM')
print("signaled, blocking on exit") print("signaled, blocking on exit")
block_with_timeout(exited, reactor) block_with_timeout(exited, reactor)
print("exited, goodbye") print("exited, goodbye")
except ProcessExitedAlready:
pass
def _tahoe_runner_optional_coverage(proto, reactor, request, other_args): def _tahoe_runner_optional_coverage(proto, reactor, request, other_args):
@ -199,8 +242,33 @@ class TahoeProcess(object):
def kill(self): def kill(self):
"""Kill the process, block until it's done.""" """Kill the process, block until it's done."""
print(f"TahoeProcess.kill({self.transport.pid} / {self.node_dir})")
_cleanup_tahoe_process(self.transport, self.transport.exited) _cleanup_tahoe_process(self.transport, self.transport.exited)
def kill_async(self):
"""
Kill the process, return a Deferred that fires when it's done.
"""
print(f"TahoeProcess.kill_async({self.transport.pid} / {self.node_dir})")
_cleanup_process_async(self.transport, allow_missing=False)
return self.transport.exited
def restart_async(self, reactor: IReactorProcess, request: Any) -> Deferred:
"""
Stop and then re-start the associated process.
:return: A Deferred that fires after the new process is ready to
handle requests.
"""
d = self.kill_async()
d.addCallback(lambda ignored: _run_node(reactor, self.node_dir, request, None, finalize=False))
def got_new_process(proc):
# Grab the new transport since the one we had before is no longer
# valid after the stop/start cycle.
self._process_transport = proc.transport
d.addCallback(got_new_process)
return d
def __str__(self): def __str__(self):
return "<TahoeProcess in '{}'>".format(self._node_dir) return "<TahoeProcess in '{}'>".format(self._node_dir)
@ -229,19 +297,17 @@ def _run_node(reactor, node_dir, request, magic_text, finalize=True):
) )
transport.exited = protocol.exited transport.exited = protocol.exited
if finalize: tahoe_process = TahoeProcess(
request.addfinalizer(partial(_cleanup_tahoe_process, transport, protocol.exited))
# XXX abusing the Deferred; should use .when_magic_seen() pattern
def got_proto(proto):
transport._protocol = proto
return TahoeProcess(
transport, transport,
node_dir, node_dir,
) )
protocol.magic_seen.addCallback(got_proto)
return protocol.magic_seen if finalize:
request.addfinalizer(tahoe_process.kill)
d = protocol.magic_seen
d.addCallback(lambda ignored: tahoe_process)
return d
def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, name, web_port, def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, name, web_port,
@ -572,3 +638,158 @@ def run_in_thread(f):
def test(*args, **kwargs): def test(*args, **kwargs):
return deferToThread(lambda: f(*args, **kwargs)) return deferToThread(lambda: f(*args, **kwargs))
return test return test
@frozen
class CHK:
"""
Represent the CHK encoding sufficiently to run a ``tahoe put`` command
using it.
"""
kind = "chk"
max_shares = 256
def customize(self) -> CHK:
# Nothing to do.
return self
@classmethod
def load(cls, params: None) -> CHK:
assert params is None
return cls()
def to_json(self) -> None:
return None
@contextmanager
def to_argv(self) -> None:
yield []
@frozen
class SSK:
"""
Represent the SSK encodings (SDMF and MDMF) sufficiently to run a
``tahoe put`` command using one of them.
"""
kind = "ssk"
# SDMF and MDMF encode share counts (N and k) into the share itself as an
# unsigned byte. They could have encoded (share count - 1) to fit the
# full range supported by ZFEC into the unsigned byte - but they don't.
# So 256 is inaccessible to those formats and we set the upper bound at
# 255.
max_shares = 255
name: Literal["sdmf", "mdmf"]
key: None | bytes
@classmethod
def load(cls, params: dict) -> SSK:
assert params.keys() == {"format", "mutable", "key"}
return cls(params["format"], params["key"].encode("ascii"))
def customize(self) -> SSK:
"""
Return an SSK with a newly generated random RSA key.
"""
return evolve(self, key=generate_rsa_key())
def to_json(self) -> dict[str, str]:
return {
"format": self.name,
"mutable": None,
"key": self.key.decode("ascii"),
}
@contextmanager
def to_argv(self) -> None:
with NamedTemporaryFile() as f:
f.write(self.key)
f.flush()
yield [f"--format={self.name}", "--mutable", f"--private-key-path={f.name}"]
def upload(alice: TahoeProcess, fmt: CHK | SSK, data: bytes) -> str:
"""
Upload the given data to the given node.
:param alice: The node to upload to.
:param fmt: The name of the format for the upload. CHK, SDMF, or MDMF.
:param data: The data to upload.
:return: The capability for the uploaded data.
"""
with NamedTemporaryFile() as f:
f.write(data)
f.flush()
with fmt.to_argv() as fmt_argv:
argv = [alice, "put"] + fmt_argv + [f.name]
return cli(*argv).decode("utf-8").strip()
async def reconfigure(reactor, request, node: TahoeProcess, params: tuple[int, int, int], convergence: None | bytes) -> None:
"""
Reconfigure a Tahoe-LAFS node with different ZFEC parameters and
convergence secret.
If the current configuration is different from the specified
configuration, the node will be restarted so it takes effect.
:param reactor: A reactor to use to restart the process.
:param request: The pytest request object to use to arrange process
cleanup.
:param node: The Tahoe-LAFS node to reconfigure.
:param params: The ``happy``, ``needed``, and ``total`` ZFEC encoding
parameters.
:param convergence: If given, the convergence secret. If not given, the
existing convergence secret will be left alone.
:return: ``None`` after the node configuration has been rewritten, the
node has been restarted, and the node is ready to provide service.
"""
happy, needed, total = params
config = node.get_config()
changed = False
cur_happy = int(config.get_config("client", "shares.happy"))
cur_needed = int(config.get_config("client", "shares.needed"))
cur_total = int(config.get_config("client", "shares.total"))
if (happy, needed, total) != (cur_happy, cur_needed, cur_total):
changed = True
config.set_config("client", "shares.happy", str(happy))
config.set_config("client", "shares.needed", str(needed))
config.set_config("client", "shares.total", str(total))
if convergence is not None:
cur_convergence = config.get_private_config("convergence").encode("ascii")
if base32.a2b(cur_convergence) != convergence:
changed = True
config.write_private_config("convergence", base32.b2a(convergence))
if changed:
# restart the node
print(f"Restarting {node.node_dir} for ZFEC reconfiguration")
await node.restart_async(reactor, request)
print("Restarted. Waiting for ready state.")
await_client_ready(node)
print("Ready.")
else:
print("Config unchanged, not restarting.")
def generate_rsa_key() -> bytes:
"""
Generate a 2048 bit RSA key suitable for use with SSKs.
"""
return rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
backend=default_backend()
).private_bytes(
encoding=Encoding.PEM,
format=PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=NoEncryption(),
)

View File

@ -0,0 +1,30 @@
__all__ = [
"DATA_PATH",
"CURRENT_VERSION",
"MAX_SHARES",
"Case",
"Sample",
"SeedParam",
"encode_bytes",
"save_capabilities",
"capabilities",
]
from .vectors import (
DATA_PATH,
CURRENT_VERSION,
Case,
Sample,
SeedParam,
encode_bytes,
save_capabilities,
capabilities,
)
from .parameters import (
MAX_SHARES,
)

View File

@ -0,0 +1,58 @@
"""
Simple data type definitions useful in the definition/verification of test
vectors.
"""
from __future__ import annotations
from attrs import frozen
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
@frozen
class Sample:
"""
Some instructions for building a long byte string.
:ivar seed: Some bytes to repeat some times to produce the string.
:ivar length: The length of the desired byte string.
"""
seed: bytes
length: int
@frozen
class Param:
"""
Some ZFEC parameters.
"""
required: int
total: int
@frozen
class SeedParam:
"""
Some ZFEC parameters, almost.
:ivar required: The number of required shares.
:ivar total: Either the number of total shares or the constant
``MAX_SHARES`` to indicate that the total number of shares should be
the maximum number supported by the object format.
"""
required: int
total: int | str
def realize(self, max_total: int) -> Param:
"""
Create a ``Param`` from this object's values, possibly
substituting the given real value for total if necessary.
:param max_total: The value to use to replace ``MAX_SHARES`` if
necessary.
"""
if self.total == MAX_SHARES:
return Param(self.required, max_total)
return Param(self.required, self.total)

View File

@ -0,0 +1,93 @@
"""
Define input parameters for test vector generation.
:ivar CONVERGENCE_SECRETS: Convergence secrets.
:ivar SEGMENT_SIZE: The single segment size that the Python implementation
currently supports without a lot of refactoring.
:ivar OBJECT_DESCRIPTIONS: Small objects with instructions which can be
expanded into a possibly large byte string. These are intended to be used
as plaintext inputs.
:ivar ZFEC_PARAMS: Input parameters to ZFEC.
:ivar FORMATS: Encoding/encryption formats.
"""
from __future__ import annotations
from hashlib import sha256
from .model import MAX_SHARES
from .vectors import Sample, SeedParam
from ..util import CHK, SSK
def digest(bs: bytes) -> bytes:
"""
Digest bytes to bytes.
"""
return sha256(bs).digest()
def hexdigest(bs: bytes) -> str:
"""
Digest bytes to text.
"""
return sha256(bs).hexdigest()
# Just a couple convergence secrets. The only thing we do with this value is
# feed it into a tagged hash. It certainly makes a difference to the output
# but the hash should destroy any structure in the input so it doesn't seem
# like there's a reason to test a lot of different values.
CONVERGENCE_SECRETS: list[bytes] = [
b"aaaaaaaaaaaaaaaa",
digest(b"Hello world")[:16],
]
SEGMENT_SIZE: int = 128 * 1024
# Exercise at least a handful of different sizes, trying to cover:
#
# 1. Some cases smaller than one "segment" (128k).
# This covers shrinking of some parameters to match data size.
# This includes one case of the smallest possible CHK.
#
# 2. Some cases right on the edges of integer segment multiples.
# Because boundaries are tricky.
#
# 4. Some cases that involve quite a few segments.
# This exercises merkle tree construction more thoroughly.
#
# See ``stretch`` for construction of the actual test data.
OBJECT_DESCRIPTIONS: list[Sample] = [
# The smallest possible. 55 bytes and smaller are LIT.
Sample(b"a", 56),
Sample(b"a", 1024),
Sample(b"c", 4096),
Sample(digest(b"foo"), SEGMENT_SIZE - 1),
Sample(digest(b"bar"), SEGMENT_SIZE + 1),
Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
Sample(digest(b"bazquux"), SEGMENT_SIZE * 32),
Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
]
ZFEC_PARAMS: list[SeedParam] = [
SeedParam(1, 1),
SeedParam(1, 3),
SeedParam(2, 3),
SeedParam(3, 10),
SeedParam(71, 255),
SeedParam(101, MAX_SHARES),
]
FORMATS: list[CHK | SSK] = [
CHK(),
# These start out unaware of a key but various keys will be supplied
# during generation.
SSK(name="sdmf", key=None),
SSK(name="mdmf", key=None),
]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,155 @@
"""
A module that loads pre-generated test vectors.
:ivar DATA_PATH: The path of the file containing test vectors.
:ivar capabilities: The capability test vectors.
"""
from __future__ import annotations
from typing import TextIO
from attrs import frozen
from yaml import safe_load, safe_dump
from base64 import b64encode, b64decode
from twisted.python.filepath import FilePath
from .model import Param, Sample, SeedParam
from ..util import CHK, SSK
DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
# The version of the persisted test vector data this code can interpret.
CURRENT_VERSION: str = "2023-01-16.2"
@frozen
class Case:
"""
Represent one case for which we want/have a test vector.
"""
seed_params: Param
convergence: bytes
seed_data: Sample
fmt: CHK | SSK
segment_size: int
@property
def data(self):
return stretch(self.seed_data.seed, self.seed_data.length)
@property
def params(self):
return self.seed_params.realize(self.fmt.max_shares)
def encode_bytes(b: bytes) -> str:
"""
Base64 encode some bytes to text so they are representable in JSON.
"""
return b64encode(b).decode("ascii")
def decode_bytes(b: str) -> bytes:
"""
Base64 decode some text to bytes.
"""
return b64decode(b.encode("ascii"))
def stretch(seed: bytes, size: int) -> bytes:
"""
Given a simple description of a byte string, return the byte string
itself.
"""
assert isinstance(seed, bytes)
assert isinstance(size, int)
assert size > 0
assert len(seed) > 0
multiples = size // len(seed) + 1
return (seed * multiples)[:size]
def save_capabilities(results: list[tuple[Case, str]], path: FilePath = DATA_PATH) -> None:
"""
Save some test vector cases and their expected values.
This is logically the inverse of ``load_capabilities``.
"""
path.setContent(safe_dump({
"version": CURRENT_VERSION,
"vector": [
{
"convergence": encode_bytes(case.convergence),
"format": {
"kind": case.fmt.kind,
"params": case.fmt.to_json(),
},
"sample": {
"seed": encode_bytes(case.seed_data.seed),
"length": case.seed_data.length,
},
"zfec": {
"segmentSize": case.segment_size,
"required": case.params.required,
"total": case.params.total,
},
"expected": cap,
}
for (case, cap)
in results
],
}).encode("ascii"))
def load_format(serialized: dict) -> CHK | SSK:
"""
Load an encrypted object format from a simple description of it.
:param serialized: A ``dict`` describing either CHK or SSK, possibly with
some parameters.
"""
if serialized["kind"] == "chk":
return CHK.load(serialized["params"])
elif serialized["kind"] == "ssk":
return SSK.load(serialized["params"])
else:
raise ValueError(f"Unrecognized format: {serialized}")
def load_capabilities(f: TextIO) -> dict[Case, str]:
"""
Load some test vector cases and their expected results from the given
file.
This is logically the inverse of ``save_capabilities``.
"""
data = safe_load(f)
if data is None:
return {}
if data["version"] != CURRENT_VERSION:
print(
f"Current version is {CURRENT_VERSION}; "
f"cannot load version {data['version']} data."
)
return {}
return {
Case(
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
segment_size=case["zfec"]["segmentSize"],
convergence=decode_bytes(case["convergence"]),
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
fmt=load_format(case["format"]),
): case["expected"]
for case
in data["vector"]
}
try:
with DATA_PATH.open() as f:
capabilities: dict[Case, str] = load_capabilities(f)
except FileNotFoundError:
capabilities = {}

1
newsfragments/3961.other Normal file
View File

@ -0,0 +1 @@
The integration test suite now includes a set of capability test vectors (``integration/vectors/test_vectors.yaml``) which can be used to verify compatibility between Tahoe-LAFS and other implementations.

0
newsfragments/3967.minor Normal file
View File

View File

@ -1,14 +1,14 @@
{ {
"mach-nix": { "mach-nix": {
"branch": "master", "branch": "switch-to-nix-pypi-fetcher-2",
"description": "Create highly reproducible python environments", "description": "Create highly reproducible python environments",
"homepage": "", "homepage": "",
"owner": "davhau", "owner": "PrivateStorageio",
"repo": "mach-nix", "repo": "mach-nix",
"rev": "bdc97ba6b2ecd045a467b008cff4ae337b6a7a6b", "rev": "f6d1a1841d8778c199326f95d0703c16bee2f8c4",
"sha256": "12b3jc0g0ak6s93g3ifvdpwxbyqx276k1kl66bpwz8a67qjbcbwf", "sha256": "0krc4yhnpbzc4yhja9frnmym2vqm5zyacjnqb3fq9z9gav8vs9ls",
"type": "tarball", "type": "tarball",
"url": "https://github.com/davhau/mach-nix/archive/bdc97ba6b2ecd045a467b008cff4ae337b6a7a6b.tar.gz", "url": "https://github.com/PrivateStorageio/mach-nix/archive/f6d1a1841d8778c199326f95d0703c16bee2f8c4.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
}, },
"niv": { "niv": {
@ -53,10 +53,10 @@
"homepage": "", "homepage": "",
"owner": "DavHau", "owner": "DavHau",
"repo": "pypi-deps-db", "repo": "pypi-deps-db",
"rev": "5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6", "rev": "5440c9c76f6431f300fb6a1ecae762a5444de5f6",
"sha256": "0pc6mj7rzvmhh303rvj5wf4hrksm4h2rf4fsvqs0ljjdmgxrqm3f", "sha256": "08r3iiaxzw9v2gq15y1m9bwajshyyz9280g6aia7mkgnjs9hnd1n",
"type": "tarball", "type": "tarball",
"url": "https://github.com/DavHau/pypi-deps-db/archive/5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6.tar.gz", "url": "https://github.com/DavHau/pypi-deps-db/archive/5440c9c76f6431f300fb6a1ecae762a5444de5f6.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
} }
} }

3
pytest.ini Normal file
View File

@ -0,0 +1,3 @@
[pytest]
markers =
slow: marks tests as slow (not run by default; run them with '--runslow')

View File

@ -139,11 +139,10 @@ install_requires = [
"werkzeug != 2.2.0", "werkzeug != 2.2.0",
"treq", "treq",
"cbor2", "cbor2",
# Ideally we want 0.4+ to be able to pass in mmap(), but it's not strictly
# necessary yet until we fix the workaround to # 0.4 adds the ability to pass in mmap() values which greatly reduces the
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3963 in # amount of copying involved.
# allmydata.storage.http_server. "pycddl >= 0.4",
"pycddl",
# for pid-file support # for pid-file support
"psutil", "psutil",

View File

@ -11,7 +11,6 @@ import binascii
from tempfile import TemporaryFile from tempfile import TemporaryFile
from os import SEEK_END, SEEK_SET from os import SEEK_END, SEEK_SET
import mmap import mmap
from importlib.metadata import version as get_package_version, PackageNotFoundError
from cryptography.x509 import Certificate as CryptoCertificate from cryptography.x509 import Certificate as CryptoCertificate
from zope.interface import implementer from zope.interface import implementer
@ -60,20 +59,6 @@ from ..util.base32 import rfc3548_alphabet
from allmydata.interfaces import BadWriteEnablerError from allmydata.interfaces import BadWriteEnablerError
# Until we figure out Nix (https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3963),
# need to support old pycddl which can only take bytes:
from distutils.version import LooseVersion
try:
PYCDDL_BYTES_ONLY = LooseVersion(get_package_version("pycddl")) < LooseVersion(
"0.4"
)
except PackageNotFoundError:
# This can happen when building PyInstaller distribution. We'll just assume
# you installed a modern pycddl, cause why wouldn't you?
PYCDDL_BYTES_ONLY = False
class ClientSecretsException(Exception): class ClientSecretsException(Exception):
"""The client did not send the appropriate secrets.""" """The client did not send the appropriate secrets."""
@ -572,7 +557,7 @@ class HTTPServer(object):
fd = request.content.fileno() fd = request.content.fileno()
except (ValueError, OSError): except (ValueError, OSError):
fd = -1 fd = -1
if fd >= 0 and not PYCDDL_BYTES_ONLY: if fd >= 0:
# It's a file, so we can use mmap() to save memory. # It's a file, so we can use mmap() to save memory.
message = mmap.mmap(fd, 0, access=mmap.ACCESS_READ) message = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
else: else: