Generate and consumer the new structure properly

2024-12-21 22:07:51 +00:00 · 2023-01-03 19:22:38 -05:00 · 2023-01-03 19:22:38 -05:00 · fb70ba1867
commit fb70ba1867
parent ca00adf2b4
3 changed files with 203 additions and 2068 deletions
--- a/integration/test_vectors.py
+++ b/integration/test_vectors.py
@ -4,28 +4,30 @@ Verify certain results against test vectors with well-known results.
 from __future__ import annotations
 from time import sleep
 from typing import AsyncGenerator, Iterator
 from hashlib import sha256
-from itertools import product
+from itertools import starmap, product
 from yaml import safe_dump
 from attrs import frozen
 from pytest import mark
 from pytest_twisted import ensureDeferred
 from . import vectors
-from .util import reconfigure, upload, asyncfoldr, insert, TahoeProcess
+from .util import reconfigure, upload, TahoeProcess
 def digest(bs: bytes) -> bytes:
    """
    Digest bytes to bytes.
    """
    return sha256(bs).digest()
 def hexdigest(bs: bytes) -> str:
    """
    Digest bytes to text.
    """
    return sha256(bs).hexdigest()
 # Just a couple convergence secrets.  The only thing we do with this value is
 # feed it into a tagged hash.  It certainly makes a difference to the output
 # but the hash should destroy any structure in the input so it doesn't seem
@ -35,7 +37,6 @@ CONVERGENCE_SECRETS = [
    digest(b"Hello world")[:16],
 ]
 # Exercise at least a handful of different sizes, trying to cover:
 #
 #  1. Some cases smaller than one "segment" (128k).
@ -51,87 +52,66 @@ CONVERGENCE_SECRETS = [
 SEGMENT_SIZE = 128 * 1024
 OBJECT_DESCRIPTIONS = [
-    (b"a", 1024),
+    vectors.Sample(b"a", 1024),
-    (b"c", 4096),
+    vectors.Sample(b"c", 4096),
-    (digest(b"foo"), SEGMENT_SIZE - 1),
+    vectors.Sample(digest(b"foo"), SEGMENT_SIZE - 1),
-    (digest(b"bar"), SEGMENT_SIZE + 1),
+    vectors.Sample(digest(b"bar"), SEGMENT_SIZE + 1),
-    (digest(b"baz"), SEGMENT_SIZE * 16 - 1),
+    vectors.Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
-    (digest(b"quux"), SEGMENT_SIZE * 16 + 1),
+    vectors.Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
-    (digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
+    vectors.Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
-    (digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
+    vectors.Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
 ]
 # CHK have a max of 256 shares.  SDMF / MDMF have a max of 255 shares!
 # Represent max symbolically and resolve it when we know what format we're
 # dealing with.
 MAX_SHARES = "max"
 # SDMF and MDMF encode share counts (N and k) into the share itself as an
 # unsigned byte.  They could have encoded (share count - 1) to fit the full
 # range supported by ZFEC into the unsigned byte - but they don't.  So 256 is
 # inaccessible to those formats and we set the upper bound at 255.
 MAX_SHARES_MAP = {
    "chk": 256,
    "sdmf": 255,
    "mdmf": 255,
 }
 ZFEC_PARAMS = [
-    (1, 1),
+    vectors.SeedParam(1, 1),
-    (1, 3),
+    vectors.SeedParam(1, 3),
-    (2, 3),
+    vectors.SeedParam(2, 3),
-    (3, 10),
+    vectors.SeedParam(3, 10),
-    (71, 255),
+    vectors.SeedParam(71, 255),
-    (101, MAX_SHARES),
+    vectors.SeedParam(101, vectors.MAX_SHARES),
 ]
 FORMATS = [
    "chk",
-    "sdmf",
+    # "sdmf",
-    "mdmf",
+    # "mdmf",
 ]
-@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
+@mark.parametrize('convergence', CONVERGENCE_SECRETS)
-def test_convergence(convergence_idx):
+def test_convergence(convergence):
    """
    Convergence secrets are 16 bytes.
    """
    convergence = CONVERGENCE_SECRETS[convergence_idx]
    assert isinstance(convergence, bytes), "Convergence secret must be bytes"
    assert len(convergence) == 16, "Convergence secret must by 16 bytes"
-@mark.parametrize('params_idx', range(len(ZFEC_PARAMS)))
+@mark.parametrize('seed_params', ZFEC_PARAMS)
-@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
+@mark.parametrize('convergence', CONVERGENCE_SECRETS)
-@mark.parametrize('data_idx', range(len(OBJECT_DESCRIPTIONS)))
+@mark.parametrize('seed_data', OBJECT_DESCRIPTIONS)
-@mark.parametrize('fmt_idx', range(len(FORMATS)))
+@mark.parametrize('fmt', FORMATS)
@ensureDeferred
-async def test_capability(reactor, request, alice, params_idx, convergence_idx, data_idx, fmt_idx):
+async def test_capability(reactor, request, alice, seed_params, convergence, seed_data, fmt):
    """
    The capability that results from uploading certain well-known data
    with certain well-known parameters results in exactly the previously
    computed value.
    """
-    case = load_case(
+    case = vectors.Case(seed_params, convergence, seed_data, fmt)
        params_idx,
        convergence_idx,
        data_idx,
        fmt_idx,
    )
    # rewrite alice's config to match params and convergence
-    await reconfigure(reactor, request, alice, (1,) + case.params, case.convergence)
+    await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
    # upload data in the correct format
    actual = upload(alice, case.fmt, case.data)
    # compare the resulting cap to the expected result
-    expected = vectors.capabilities["vector"][case.key]
+    expected = vectors.capabilities[case]
    assert actual == expected
@ensureDeferred
-async def skiptest_generate(reactor, request, alice):
+async def test_generate(reactor, request, alice):
    """
    This is a helper for generating the test vectors.
@ -141,27 +121,34 @@ async def skiptest_generate(reactor, request, alice):
    to run against the results produced originally, not a possibly
    ever-changing set of outputs.
    """
-    space = product(
+    space = starmap(vectors.Case, product(
-        range(len(ZFEC_PARAMS)),
+        ZFEC_PARAMS,
-        range(len(CONVERGENCE_SECRETS)),
+        CONVERGENCE_SECRETS,
-        range(len(OBJECT_DESCRIPTIONS)),
+        OBJECT_DESCRIPTIONS,
-        range(len(FORMATS)),
+        FORMATS,
-    )
+    ))
-    results = await asyncfoldr(
+    results = generate(reactor, request, alice, space)
        generate(reactor, request, alice, space),
        insert,
        {},
    )
    with vectors.DATA_PATH.open("w") as f:
        f.write(safe_dump({
-            "version": "2022-12-26",
+            "version": "2023-01-03",
-            "params": {
+            "vector": [
-                "zfec": ZFEC_PARAMS,
+                {
-                "convergence": CONVERGENCE_SECRETS,
+                    "convergence": vectors.encode_bytes(case.convergence),
-                "objects": OBJECT_DESCRIPTIONS,
+                    "format": case.fmt,
-                "formats": FORMATS,
+                    "sample": {
-            },
+                        "seed": vectors.encode_bytes(case.seed_data.seed),
-            "vector": results,
+                        "length": case.seed_data.length,
                    },
                    "zfec": {
                        "segmentSize": SEGMENT_SIZE,
                        "required": case.seed_params.required,
                        "total": case.seed_params.total,
                    },
                    "expected": cap,
                }
                async for (case, cap)
                in results
            ],
        }))
@ -169,8 +156,8 @@ async def generate(
        reactor,
        request,
        alice: TahoeProcess,
-        space: Iterator[int, int, int, int],
+        cases: Iterator[vectors.Case],
-) -> AsyncGenerator[tuple[str, str], None]:
+) -> AsyncGenerator[[vectors.Case, str], None]:
    """
    Generate all of the test vectors using the given node.
@ -184,79 +171,21 @@ async def generate(
    :param alice: The Tahoe-LAFS node to use to generate the test vectors.
-    :param space: An iterator of coordinates in the test vector space for
+    :param case: The inputs for which to generate a value.
       which to generate values.  The elements of each tuple give indexes into
       ZFEC_PARAMS, CONVERGENCE_SECRETS, OBJECT_DESCRIPTIONS, and FORMATS.
-    :return: The yield values are two-tuples describing a test vector.  The
+    :return: The capability for the case.
        first element is a string describing a case and the second element is
        the capability for that case.
    """
    # Share placement doesn't affect the resulting capability.  For maximum
    # reliability of this generator, be happy if we can put shares anywhere
    happy = 1
-    node_key = (None, None)
+    for case in cases:
-    for params_idx, secret_idx, data_idx, fmt_idx in space:
+        await reconfigure(
-        case = load_case(params_idx, secret_idx, data_idx, fmt_idx)
+            reactor,
-        if node_key != (case.params, case.convergence):
+            request,
-            await reconfigure(reactor, request, alice, (happy,) + case.params, case.convergence)
+            alice,
-            node_key = (case.params, case.convergence)
+            (happy, case.params.required, case.params.total),
            case.convergence
        )
        cap = upload(alice, case.fmt, case.data)
-        yield case.key, cap
+        yield case, cap
 def key(params: int, secret: int, data: int, fmt: int) -> str:
    """
    Construct the key describing the case defined by the given parameters.
    The parameters are indexes into the test data for a certain case.
    :return: A distinct string for the given inputs.
    """
    return f"{params}-{secret}-{data}-{fmt}"
 def stretch(seed: bytes, size: int) -> bytes:
    """
    Given a simple description of a byte string, return the byte string
    itself.
    """
    assert isinstance(seed, bytes)
    assert isinstance(size, int)
    assert size > 0
    assert len(seed) > 0
    multiples = size // len(seed) + 1
    return (seed * multiples)[:size]
 def load_case(
        params_idx: int,
        convergence_idx: int,
        data_idx: int,
        fmt_idx: int
 ) -> Case:
    """
    :return:
    """
    params = ZFEC_PARAMS[params_idx]
    fmt = FORMATS[fmt_idx]
    convergence = CONVERGENCE_SECRETS[convergence_idx]
    data = stretch(*OBJECT_DESCRIPTIONS[data_idx])
    if params[1] == MAX_SHARES:
        params = (params[0], MAX_SHARES_MAP[fmt])
    k = key(params_idx, convergence_idx, data_idx, fmt_idx)
    return Case(k, fmt, params, convergence, data)
@frozen
 class Case:
    """
    Represent one case for which we want/have a test vector.
    """
    key: str
    fmt: str
    params: tuple[int, int]
    convergence: bytes
    data: bytes
--- a/integration/test_vectors.yaml
+++ b/integration/test_vectors.yaml
--- a/integration/vectors.py
+++ b/integration/vectors.py
@ -1,18 +1,144 @@
 """
 A module that loads pre-generated test vectors.
-:ivar CHK_PATH: The path of the file containing CHK test vectors.
+:ivar DATA_PATH: The path of the file containing test vectors.
-:ivar chk: The CHK test vectors.
+:ivar capabilities: The CHK test vectors.
 """
 from __future__ import annotations
 from typing import TextIO
 from attrs import frozen
 from yaml import safe_load
 from pathlib import Path
 from base64 import b64encode, b64decode
 DATA_PATH: Path = Path(__file__).parent / "test_vectors.yaml"
@frozen
 class Sample:
    """
    Some instructions for building a long byte string.
    :ivar seed: Some bytes to repeat some times to produce the string.
    :ivar length: The length of the desired byte string.
    """
    seed: bytes
    length: int
@frozen
 class Param:
    """
    Some ZFEC parameters.
    """
    required: int
    total: int
 # CHK have a max of 256 shares.  SDMF / MDMF have a max of 255 shares!
 # Represent max symbolically and resolve it when we know what format we're
 # dealing with.
 MAX_SHARES = "max"
 # SDMF and MDMF encode share counts (N and k) into the share itself as an
 # unsigned byte.  They could have encoded (share count - 1) to fit the full
 # range supported by ZFEC into the unsigned byte - but they don't.  So 256 is
 # inaccessible to those formats and we set the upper bound at 255.
 MAX_SHARES_MAP = {
    "chk": 256,
    "sdmf": 255,
    "mdmf": 255,
 }
@frozen
 class SeedParam:
    """
    Some ZFEC parameters, almost.
    :ivar required: The number of required shares.
    :ivar total: Either the number of total shares or the constant
        ``MAX_SHARES`` to indicate that the total number of shares should be
        the maximum number supported by the object format.
    """
    required: int
    total: int | str
    def realize(self, max_total: int) -> Param:
        """
        Create a ``Param`` from this object's values, possibly
        substituting the given real value for total if necessary.
        :param max_total: The value to use to replace ``MAX_SHARES`` if
            necessary.
        """
        if self.total == MAX_SHARES:
            return Param(self.required, max_total)
        return Param(self.required, self.total)
@frozen
 class Case:
    """
    Represent one case for which we want/have a test vector.
    """
    seed_params: Param
    convergence: bytes
    seed_data: Sample
    fmt: str
    @property
    def data(self):
        return stretch(self.seed_data.seed, self.seed_data.length)
    @property
    def params(self):
        return self.seed_params.realize(MAX_SHARES_MAP[self.fmt])
 def encode_bytes(b: bytes) -> str:
    """
    Base64 encode some bytes to text so they are representable in JSON.
    """
    return b64encode(b).decode("ascii")
 def decode_bytes(b: str) -> bytes:
    """
    Base64 decode some text to bytes.
    """
    return b64decode(b.encode("ascii"))
 def stretch(seed: bytes, size: int) -> bytes:
    """
    Given a simple description of a byte string, return the byte string
    itself.
    """
    assert isinstance(seed, bytes)
    assert isinstance(size, int)
    assert size > 0
    assert len(seed) > 0
    multiples = size // len(seed) + 1
    return (seed * multiples)[:size]
 def load_capabilities(f: TextIO) -> dict[Case, str]:
    data = safe_load(f)
    return {
        Case(
            seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
            convergence=decode_bytes(case["convergence"]),
            seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
            fmt=case["format"],
        ): case["expected"]
        for case
        in data["vector"]
    }
 try:
    with DATA_PATH.open() as f:
-        capabilities: dict[str, str] = safe_load(f)
+        capabilities: dict[Case, str] = load_capabilities(f)
 except FileNotFoundError:
    capabilities = {}