Factor more infrastructure code out of the test module

Test vector saving implementation can go near loading implementation. Also we can separate out some simple types from the more complex logic. Initially this was to resolve a circular dependency but that ended up being resolved mostly by treatming SEGMENT_SIZE more like a parameter than a global. Still, smaller modules are okay...
2024-12-19 04:57:54 +00:00 · 2023-01-18 13:52:11 -05:00 · 2023-01-18 13:52:11 -05:00 · 129c6ec11a
commit 129c6ec11a
parent 280a77b53d
5 changed files with 125 additions and 92 deletions
--- a/integration/test_vectors.py
+++ b/integration/test_vectors.py
@ -4,17 +4,15 @@ Verify certain results against test vectors with well-known results.

 from __future__ import annotations

+from functools import partial
 from typing import AsyncGenerator, Iterator
 from itertools import starmap, product
-from yaml import safe_dump

 from attrs import evolve

 from pytest import mark
 from pytest_twisted import ensureDeferred

-from twisted.python.filepath import FilePath
-
 from . import vectors
 from .vectors import parameters
 from .util import reconfigure, upload, TahoeProcess
@ -58,48 +56,24 @@ async def skiptest_generate(reactor, request, alice):
    to run against the results produced originally, not a possibly
    ever-changing set of outputs.
    """
-    space = starmap(vectors.Case, product(
-        parameters.ZFEC_PARAMS,
-        parameters.CONVERGENCE_SECRETS,
-        parameters.OBJECT_DESCRIPTIONS,
-        parameters.FORMATS,
-    ))
+    space = starmap(
+        # segmentSize could be a parameter someday but it's not easy to vary
+        # using the Python implementation so it isn't one for now.
+        partial(vectors.Case, segmentSize=parameters.SEGMENT_SIZE),
+        product(
+            parameters.ZFEC_PARAMS,
+            parameters.CONVERGENCE_SECRETS,
+            parameters.OBJECT_DESCRIPTIONS,
+            parameters.FORMATS,
+        ),
+    )
    iterresults = generate(reactor, request, alice, space)

    # Update the output file with results as they become available.
    results = []
    async for result in iterresults:
        results.append(result)
-        write_results(vectors.DATA_PATH, results)
-
-def write_results(path: FilePath, results: list[tuple[vectors.Case, str]]) -> None:
-    """
-    Save the given results.
-    """
-    path.setContent(safe_dump({
-        "version": vectors.CURRENT_VERSION,
-        "vector": [
-            {
-                "convergence": vectors.encode_bytes(case.convergence),
-                "format": {
-                    "kind": case.fmt.kind,
-                    "params": case.fmt.to_json(),
-                },
-                "sample": {
-                    "seed": vectors.encode_bytes(case.seed_data.seed),
-                    "length": case.seed_data.length,
-                },
-                "zfec": {
-                    "segmentSize": parameters.SEGMENT_SIZE,
-                    "required": case.params.required,
-                    "total": case.params.total,
-                },
-                "expected": cap,
-            }
-            for (case, cap)
-            in results
-        ],
-    }).encode("ascii"))
+        vectors.save_capabilities(results)

 async def generate(
        reactor,
--- a/integration/vectors/init.py
+++ b/integration/vectors/init.py
@ -14,7 +14,6 @@ __all__ = [
 from .vectors import (
    DATA_PATH,
    CURRENT_VERSION,
-    MAX_SHARES,

    Case,
    Sample,
@ -23,3 +22,7 @@ from .vectors import (

    capabilities,
 )
+
+from .parameters import (
+    MAX_SHARES,
+)
--- a/integration/vectors/model.py
+++ b/integration/vectors/model.py
@ -0,0 +1,58 @@
+"""
+Simple data type definitions useful in the definition/verification of test
+vectors.
+"""
+
+from __future__ import annotations
+
+from attrs import frozen
+
+# CHK have a max of 256 shares.  SDMF / MDMF have a max of 255 shares!
+# Represent max symbolically and resolve it when we know what format we're
+# dealing with.
+MAX_SHARES = "max"
+
+@frozen
+class Sample:
+    """
+    Some instructions for building a long byte string.
+
+    :ivar seed: Some bytes to repeat some times to produce the string.
+    :ivar length: The length of the desired byte string.
+    """
+    seed: bytes
+    length: int
+
+@frozen
+class Param:
+    """
+    Some ZFEC parameters.
+    """
+    required: int
+    total: int
+
+@frozen
+class SeedParam:
+    """
+    Some ZFEC parameters, almost.
+
+    :ivar required: The number of required shares.
+
+    :ivar total: Either the number of total shares or the constant
+        ``MAX_SHARES`` to indicate that the total number of shares should be
+        the maximum number supported by the object format.
+    """
+    required: int
+    total: int | str
+
+    def realize(self, max_total: int) -> Param:
+        """
+        Create a ``Param`` from this object's values, possibly
+        substituting the given real value for total if necessary.
+
+        :param max_total: The value to use to replace ``MAX_SHARES`` if
+            necessary.
+        """
+        if self.total == MAX_SHARES:
+            return Param(self.required, max_total)
+        return Param(self.required, self.total)
--- a/integration/vectors/parameters.py
+++ b/integration/vectors/parameters.py
@ -19,7 +19,8 @@ from __future__ import annotations

 from hashlib import sha256

-from . import MAX_SHARES, Sample, SeedParam
+from .model import MAX_SHARES
+from .vectors import Sample, SeedParam
 from ..util import CHK, SSK

 def digest(bs: bytes) -> bytes:
--- a/integration/vectors/vectors.py
+++ b/integration/vectors/vectors.py
@ -10,11 +10,12 @@ from __future__ import annotations

 from typing import TextIO
 from attrs import frozen
-from yaml import safe_load
+from yaml import safe_load, safe_dump
 from base64 import b64encode, b64decode

 from twisted.python.filepath import FilePath

+from .model import Param, Sample, SeedParam
 from ..util import CHK, SSK

 DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
@ -22,62 +23,13 @@ DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
 # The version of the persisted test vector data this code can interpret.
 CURRENT_VERSION: str = "2023-01-16.2"

-@frozen
-class Sample:
-    """
-    Some instructions for building a long byte string.
-
-    :ivar seed: Some bytes to repeat some times to produce the string.
-    :ivar length: The length of the desired byte string.
-    """
-    seed: bytes
-    length: int
-
-@frozen
-class Param:
-    """
-    Some ZFEC parameters.
-    """
-    required: int
-    total: int
-
-# CHK have a max of 256 shares.  SDMF / MDMF have a max of 255 shares!
-# Represent max symbolically and resolve it when we know what format we're
-# dealing with.
-MAX_SHARES = "max"
-
-@frozen
-class SeedParam:
-    """
-    Some ZFEC parameters, almost.
-
-    :ivar required: The number of required shares.
-
-    :ivar total: Either the number of total shares or the constant
-        ``MAX_SHARES`` to indicate that the total number of shares should be
-        the maximum number supported by the object format.
-    """
-    required: int
-    total: int | str
-
-    def realize(self, max_total: int) -> Param:
-        """
-        Create a ``Param`` from this object's values, possibly
-        substituting the given real value for total if necessary.
-
-        :param max_total: The value to use to replace ``MAX_SHARES`` if
-            necessary.
-        """
-        if self.total == MAX_SHARES:
-            return Param(self.required, max_total)
-        return Param(self.required, self.total)
-
@frozen
 class Case:
    """
    Represent one case for which we want/have a test vector.
    """
    seed_params: Param
+    segment_size: int
    convergence: bytes
    seed_data: Sample
    fmt: CHK | SSK
@ -119,7 +71,45 @@ def stretch(seed: bytes, size: int) -> bytes:
    return (seed * multiples)[:size]


+def save_capabilities(results: list[tuple[Case, str]], path: FilePath = DATA_PATH) -> None:
+    """
+    Save some test vector cases and their expected values.
+
+    This is logically the inverse of ``load_capabilities``.
+    """
+    path.setContent(safe_dump({
+        "version": CURRENT_VERSION,
+        "vector": [
+            {
+                "convergence": encode_bytes(case.convergence),
+                "format": {
+                    "kind": case.fmt.kind,
+                    "params": case.fmt.to_json(),
+                },
+                "sample": {
+                    "seed": encode_bytes(case.seed_data.seed),
+                    "length": case.seed_data.length,
+                },
+                "zfec": {
+                    "segmentSize": case.segment_size,
+                    "required": case.params.required,
+                    "total": case.params.total,
+                },
+                "expected": cap,
+            }
+            for (case, cap)
+            in results
+        ],
+    }).encode("ascii"))
+
+
 def load_format(serialized: dict) -> CHK | SSK:
+    """
+    Load an encrypted object format from a simple description of it.
+
+    :param serialized: A ``dict`` describing either CHK or SSK, possibly with
+        some parameters.
+    """
    if serialized["kind"] == "chk":
        return CHK.load(serialized["params"])
    elif serialized["kind"] == "ssk":
@ -129,6 +119,12 @@ def load_format(serialized: dict) -> CHK | SSK:


 def load_capabilities(f: TextIO) -> dict[Case, str]:
+    """
+    Load some test vector cases and their expected results from the given
+    file.
+
+    This is logically the inverse of ``save_capabilities``.
+    """
    data = safe_load(f)
    if data is None:
        return {}
@ -142,6 +138,7 @@ def load_capabilities(f: TextIO) -> dict[Case, str]:
    return {
        Case(
            seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
+            segment_size=case["zfec"]["segmentSize"],
            convergence=decode_bytes(case["convergence"]),
            seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
            fmt=load_format(case["format"]),