reproducible ssk vectors

2025-06-03 08:10:52 +00:00 · 2023-01-16 15:53:24 -05:00 · 2023-01-16 15:53:24 -05:00 · 4eec8113ee
commit 4eec8113ee
parent 1827834434
5 changed files with 16362 additions and 1119 deletions
--- a/integration/test_vectors.py
+++ b/integration/test_vectors.py
@ -9,11 +9,13 @@ from hashlib import sha256
 from itertools import starmap, product
 from yaml import safe_dump
 from attrs import evolve
 from pytest import mark
 from pytest_twisted import ensureDeferred
 from . import vectors
-from .util import reconfigure, upload, TahoeProcess
+from .util import CHK, SSK, reconfigure, upload, TahoeProcess
 def digest(bs: bytes) -> bytes:
    """
@ -75,9 +77,11 @@ ZFEC_PARAMS = [
 ]
 FORMATS = [
-    "chk",
+    CHK(),
-    # "sdmf",
+    # These start out unaware of a key but various keys will be supplied
-    # "mdmf",
+    # during generation.
    SSK(name="sdmf", key=None),
    SSK(name="mdmf", key=None),
 ]
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
@ -89,18 +93,15 @@ def test_convergence(convergence):
    assert len(convergence) == 16, "Convergence secret must by 16 bytes"
-@mark.parametrize('seed_params', ZFEC_PARAMS)
+@mark.parametrize('case_and_expected', vectors.capabilities.items())
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
@mark.parametrize('seed_data', OBJECT_DESCRIPTIONS)
@mark.parametrize('fmt', FORMATS)
@ensureDeferred
-async def test_capability(reactor, request, alice, seed_params, convergence, seed_data, fmt):
+async def test_capability(reactor, request, alice, case_and_expected):
    """
    The capability that results from uploading certain well-known data
    with certain well-known parameters results in exactly the previously
    computed value.
    """
-    case = vectors.Case(seed_params, convergence, seed_data, fmt)
+    case, expected = case_and_expected
    # rewrite alice's config to match params and convergence
    await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
@ -109,7 +110,6 @@ async def test_capability(reactor, request, alice, seed_params, convergence, see
    actual = upload(alice, case.fmt, case.data)
    # compare the resulting cap to the expected result
    expected = vectors.capabilities[case]
    assert actual == expected
@ -130,13 +130,27 @@ async def test_generate(reactor, request, alice):
        OBJECT_DESCRIPTIONS,
        FORMATS,
    ))
-    results = generate(reactor, request, alice, space)
+    iterresults = generate(reactor, request, alice, space)
-    vectors.DATA_PATH.setContent(safe_dump({
+
-        "version": "2023-01-12",
+    # Update the output file with results as they become available.
    results = []
    async for result in iterresults:
        results.append(result)
        write_results(vectors.DATA_PATH, results)
 def write_results(path: FilePath, results: list[tuple[Case, str]]) -> None:
    """
    Save the given results.
    """
    path.setContent(safe_dump({
        "version": vectors.CURRENT_VERSION,
        "vector": [
            {
                "convergence": vectors.encode_bytes(case.convergence),
-                "format": case.fmt,
+                "format": {
                    "kind": case.fmt.kind,
                    "params": case.fmt.to_json(),
                },
                "sample": {
                    "seed": vectors.encode_bytes(case.seed_data.seed),
                    "length": case.seed_data.length,
@ -148,12 +162,11 @@ async def test_generate(reactor, request, alice):
                },
                "expected": cap,
            }
-            async for (case, cap)
+            for (case, cap)
            in results
        ],
    }).encode("ascii"))
 async def generate(
        reactor,
        request,
@ -189,5 +202,7 @@ async def generate(
            case.convergence
        )
        # Give the format a chance to make an RSA key if it needs it.
        case = evolve(case, fmt=case.fmt.customize())
        cap = upload(alice, case.fmt, case.data)
        yield case, cap
--- a/integration/test_vectors.yaml
+++ b/integration/test_vectors.yaml
--- a/integration/util.py
+++ b/integration/util.py
@ -2,7 +2,11 @@
 General functionality useful for the implementation of integration tests.
 """
 from __future__ import annotations
 from contextlib import contextmanager
 from typing import TypeVar, Iterator, Awaitable, Callable
 from typing_extensions import Literal
 from tempfile import NamedTemporaryFile
 import sys
 import time
@ -21,8 +25,17 @@ from twisted.internet.protocol import ProcessProtocol
 from twisted.internet.error import ProcessExitedAlready, ProcessDone
 from twisted.internet.threads import deferToThread
 from attrs import frozen, evolve
 import requests
 from cryptography.hazmat.primitives.asymmetric import rsa
 from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives.serialization import (
    Encoding,
    PrivateFormat,
    NoEncryption,
 )
 from paramiko.rsakey import RSAKey
 from boltons.funcutils import wraps
@ -225,7 +238,7 @@ class TahoeProcess(object):
    def restart_async(self, reactor, request):
        d = self.kill_async()
-        d.addCallback(lambda ignored: _run_node(reactor, self.node_dir, request, None))
+        d.addCallback(lambda ignored: _run_node(reactor, self.node_dir, request, None, finalize=False))
        def got_new_process(proc):
            self._process_transport = proc.transport
        d.addCallback(got_new_process)
@ -603,8 +616,76 @@ def run_in_thread(f):
        return deferToThread(lambda: f(*args, **kwargs))
    return test
@frozen
 class CHK:
    """
    Represent the CHK encoding sufficiently to run a ``tahoe put`` command
    using it.
    """
    kind = "chk"
    max_shares = 256
-def upload(alice: TahoeProcess, fmt: str, data: bytes) -> str:
+    def customize(self) -> CHK:
        # Nothing to do.
        return self
    @classmethod
    def load(cls, params: None) -> CHK:
        assert params is None
        return cls()
    def to_json(self) -> None:
        return None
    @contextmanager
    def to_argv(self) -> None:
        yield []
@frozen
 class SSK:
    """
    Represent the SSK encodings (SDMF and MDMF) sufficiently to run a
    ``tahoe put`` command using one of them.
    """
    kind = "ssk"
    # SDMF and MDMF encode share counts (N and k) into the share itself as an
    # unsigned byte.  They could have encoded (share count - 1) to fit the
    # full range supported by ZFEC into the unsigned byte - but they don't.
    # So 256 is inaccessible to those formats and we set the upper bound at
    # 255.
    max_shares = 255
    name: Literal["sdmf", "mdmf"]
    key: None | bytes
    @classmethod
    def load(cls, params: dict) -> SSK:
        assert params.keys() == {"format", "mutable", "key"}
        return cls(params["format"], params["key"].encode("ascii"))
    def customize(self) -> SSK:
        """
        Return an SSK with a newly generated random RSA key.
        """
        return evolve(self, key=generate_rsa_key())
    def to_json(self) -> dict[str, str]:
        return {
            "format": self.name,
            "mutable": None,
            "key": self.key.decode("ascii"),
        }
    @contextmanager
    def to_argv(self) -> None:
        with NamedTemporaryFile() as f:
            f.write(self.key)
            f.flush()
            yield [f"--format={self.name}", "--mutable", f"--private-key-path={f.name}"]
 def upload(alice: TahoeProcess, fmt: CHK | SSK, data: bytes) -> str:
    """
    Upload the given data to the given node.
@ -616,11 +697,13 @@ def upload(alice: TahoeProcess, fmt: str, data: bytes) -> str:
    :return: The capability for the uploaded data.
    """
    with NamedTemporaryFile() as f:
        f.write(data)
        f.flush()
-        return cli(alice, "put", f"--format={fmt}", f.name).decode("utf-8").strip()
+        with fmt.to_argv() as fmt_argv:
-
+            argv = [alice, "put"] + fmt_argv + [f.name]
            return cli(*argv).decode("utf-8").strip()
 α = TypeVar("α")
 β = TypeVar("β")
@ -707,3 +790,18 @@ async def reconfigure(reactor, request, node: TahoeProcess, params: tuple[int, i
        print("Ready.")
    else:
        print("Config unchanged, not restarting.")
 def generate_rsa_key() -> bytes:
    """
    Generate a 2048 bit RSA key suitable for use with SSKs.
    """
    return rsa.generate_private_key(
        public_exponent=65537,
        key_size=2048,
        backend=default_backend()
    ).private_bytes(
        encoding=Encoding.PEM,
        format=PrivateFormat.TraditionalOpenSSL,
        encryption_algorithm=NoEncryption(),
    )
--- a/integration/vectors.py
+++ b/integration/vectors.py
@ -3,7 +3,7 @@ A module that loads pre-generated test vectors.
 :ivar DATA_PATH: The path of the file containing test vectors.
-:ivar capabilities: The CHK test vectors.
+:ivar capabilities: The capability test vectors.
 """
 from __future__ import annotations
@ -11,12 +11,16 @@ from __future__ import annotations
 from typing import TextIO
 from attrs import frozen
 from yaml import safe_load
 from pathlib import Path
 from base64 import b64encode, b64decode
 from twisted.python.filepath import FilePath
-DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
+from .util import CHK, SSK
 DATA_PATH: FilePath = FilePath(__file__).sibling("vectors").child("test_vectors.yaml")
 # The version of the persisted test vector data this code can interpret.
 CURRENT_VERSION: str = "2023-01-16.2"
@frozen
 class Sample:
@ -42,16 +46,6 @@ class Param:
 # dealing with.
 MAX_SHARES = "max"
 # SDMF and MDMF encode share counts (N and k) into the share itself as an
 # unsigned byte.  They could have encoded (share count - 1) to fit the full
 # range supported by ZFEC into the unsigned byte - but they don't.  So 256 is
 # inaccessible to those formats and we set the upper bound at 255.
 MAX_SHARES_MAP = {
    "chk": 256,
    "sdmf": 255,
    "mdmf": 255,
 }
@frozen
 class SeedParam:
    """
@ -86,7 +80,7 @@ class Case:
    seed_params: Param
    convergence: bytes
    seed_data: Sample
-    fmt: str
+    fmt: CHK | SSK
    @property
    def data(self):
@ -94,7 +88,7 @@ class Case:
    @property
    def params(self):
-        return self.seed_params.realize(MAX_SHARES_MAP[self.fmt])
+        return self.seed_params.realize(self.fmt.max_shares)
 def encode_bytes(b: bytes) -> str:
@ -125,16 +119,32 @@ def stretch(seed: bytes, size: int) -> bytes:
    return (seed * multiples)[:size]
 def load_format(serialized: dict) -> CHK | SSK:
    if serialized["kind"] == "chk":
        return CHK.load(serialized["params"])
    elif serialized["kind"] == "ssk":
        return SSK.load(serialized["params"])
    else:
        raise ValueError(f"Unrecognized format: {serialized}")
 def load_capabilities(f: TextIO) -> dict[Case, str]:
    data = safe_load(f)
    if data is None:
        return {}
    if data["version"] != CURRENT_VERSION:
        print(
            f"Current version is {CURRENT_VERSION}; "
            "cannot load version {data['version']} data."
        )
        return {}
    return {
        Case(
            seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
            convergence=decode_bytes(case["convergence"]),
            seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
-            fmt=case["format"],
+            fmt=load_format(case["format"]),
        ): case["expected"]
        for case
        in data["vector"]
--- a/integration/vectors/test_vectors.yaml
+++ b/integration/vectors/test_vectors.yaml