Factor more infrastructure code out of the test module

Test vector saving implementation can go near loading implementation.  Also we
can separate out some simple types from the more complex logic.  Initially
this was to resolve a circular dependency but that ended up being resolved
mostly by treatming SEGMENT_SIZE more like a parameter than a global.  Still,
smaller modules are okay...
This commit is contained in:
Jean-Paul Calderone 2023-01-18 13:52:11 -05:00
parent 280a77b53d
commit 129c6ec11a
5 changed files with 125 additions and 92 deletions

View File

@ -4,17 +4,15 @@ Verify certain results against test vectors with well-known results.
from __future__ import annotations
from functools import partial
from typing import AsyncGenerator, Iterator
from itertools import starmap, product
from yaml import safe_dump
from attrs import evolve
from pytest import mark
from pytest_twisted import ensureDeferred
from twisted.python.filepath import FilePath
from . import vectors
from .vectors import parameters
from .util import reconfigure, upload, TahoeProcess
@ -58,48 +56,24 @@ async def skiptest_generate(reactor, request, alice):
to run against the results produced originally, not a possibly
ever-changing set of outputs.
"""
space = starmap(vectors.Case, product(
parameters.ZFEC_PARAMS,
parameters.CONVERGENCE_SECRETS,
parameters.OBJECT_DESCRIPTIONS,
parameters.FORMATS,
))
space = starmap(
# segmentSize could be a parameter someday but it's not easy to vary
# using the Python implementation so it isn't one for now.
partial(vectors.Case, segmentSize=parameters.SEGMENT_SIZE),
product(
parameters.ZFEC_PARAMS,
parameters.CONVERGENCE_SECRETS,
parameters.OBJECT_DESCRIPTIONS,
parameters.FORMATS,
),
)
iterresults = generate(reactor, request, alice, space)
# Update the output file with results as they become available.
results = []
async for result in iterresults:
results.append(result)
write_results(vectors.DATA_PATH, results)
def write_results(path: FilePath, results: list[tuple[vectors.Case, str]]) -> None:
"""
Save the given results.
"""
path.setContent(safe_dump({
"version": vectors.CURRENT_VERSION,
"vector": [
{
"convergence": vectors.encode_bytes(case.convergence),
"format": {
"kind": case.fmt.kind,
"params": case.fmt.to_json(),
},
"sample": {
"seed": vectors.encode_bytes(case.seed_data.seed),
"length": case.seed_data.length,
},
"zfec": {
"segmentSize": parameters.SEGMENT_SIZE,
"required": case.params.required,
"total": case.params.total,
},
"expected": cap,
}
for (case, cap)
in results
],
}).encode("ascii"))
vectors.save_capabilities(results)
async def generate(
reactor,

View File

@ -14,7 +14,6 @@ __all__ = [
from .vectors import (
DATA_PATH,
CURRENT_VERSION,
MAX_SHARES,
Case,
Sample,
@ -23,3 +22,7 @@ from .vectors import (
capabilities,
)
from .parameters import (
MAX_SHARES,
)

View File

@ -0,0 +1,58 @@
"""
Simple data type definitions useful in the definition/verification of test
vectors.
"""
from __future__ import annotations
from attrs import frozen
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
@frozen
class Sample:
"""
Some instructions for building a long byte string.
:ivar seed: Some bytes to repeat some times to produce the string.
:ivar length: The length of the desired byte string.
"""
seed: bytes
length: int
@frozen
class Param:
"""
Some ZFEC parameters.
"""
required: int
total: int
@frozen
class SeedParam:
"""
Some ZFEC parameters, almost.
:ivar required: The number of required shares.
:ivar total: Either the number of total shares or the constant
``MAX_SHARES`` to indicate that the total number of shares should be
the maximum number supported by the object format.
"""
required: int
total: int | str
def realize(self, max_total: int) -> Param:
"""
Create a ``Param`` from this object's values, possibly
substituting the given real value for total if necessary.
:param max_total: The value to use to replace ``MAX_SHARES`` if
necessary.
"""
if self.total == MAX_SHARES:
return Param(self.required, max_total)
return Param(self.required, self.total)

View File

@ -19,7 +19,8 @@ from __future__ import annotations
from hashlib import sha256
from . import MAX_SHARES, Sample, SeedParam
from .model import MAX_SHARES
from .vectors import Sample, SeedParam
from ..util import CHK, SSK
def digest(bs: bytes) -> bytes:

View File

@ -10,11 +10,12 @@ from __future__ import annotations
from typing import TextIO
from attrs import frozen
from yaml import safe_load
from yaml import safe_load, safe_dump
from base64 import b64encode, b64decode
from twisted.python.filepath import FilePath
from .model import Param, Sample, SeedParam
from ..util import CHK, SSK
DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
@ -22,62 +23,13 @@ DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
# The version of the persisted test vector data this code can interpret.
CURRENT_VERSION: str = "2023-01-16.2"
@frozen
class Sample:
"""
Some instructions for building a long byte string.
:ivar seed: Some bytes to repeat some times to produce the string.
:ivar length: The length of the desired byte string.
"""
seed: bytes
length: int
@frozen
class Param:
"""
Some ZFEC parameters.
"""
required: int
total: int
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
@frozen
class SeedParam:
"""
Some ZFEC parameters, almost.
:ivar required: The number of required shares.
:ivar total: Either the number of total shares or the constant
``MAX_SHARES`` to indicate that the total number of shares should be
the maximum number supported by the object format.
"""
required: int
total: int | str
def realize(self, max_total: int) -> Param:
"""
Create a ``Param`` from this object's values, possibly
substituting the given real value for total if necessary.
:param max_total: The value to use to replace ``MAX_SHARES`` if
necessary.
"""
if self.total == MAX_SHARES:
return Param(self.required, max_total)
return Param(self.required, self.total)
@frozen
class Case:
"""
Represent one case for which we want/have a test vector.
"""
seed_params: Param
segment_size: int
convergence: bytes
seed_data: Sample
fmt: CHK | SSK
@ -119,7 +71,45 @@ def stretch(seed: bytes, size: int) -> bytes:
return (seed * multiples)[:size]
def save_capabilities(results: list[tuple[Case, str]], path: FilePath = DATA_PATH) -> None:
"""
Save some test vector cases and their expected values.
This is logically the inverse of ``load_capabilities``.
"""
path.setContent(safe_dump({
"version": CURRENT_VERSION,
"vector": [
{
"convergence": encode_bytes(case.convergence),
"format": {
"kind": case.fmt.kind,
"params": case.fmt.to_json(),
},
"sample": {
"seed": encode_bytes(case.seed_data.seed),
"length": case.seed_data.length,
},
"zfec": {
"segmentSize": case.segment_size,
"required": case.params.required,
"total": case.params.total,
},
"expected": cap,
}
for (case, cap)
in results
],
}).encode("ascii"))
def load_format(serialized: dict) -> CHK | SSK:
"""
Load an encrypted object format from a simple description of it.
:param serialized: A ``dict`` describing either CHK or SSK, possibly with
some parameters.
"""
if serialized["kind"] == "chk":
return CHK.load(serialized["params"])
elif serialized["kind"] == "ssk":
@ -129,6 +119,12 @@ def load_format(serialized: dict) -> CHK | SSK:
def load_capabilities(f: TextIO) -> dict[Case, str]:
"""
Load some test vector cases and their expected results from the given
file.
This is logically the inverse of ``save_capabilities``.
"""
data = safe_load(f)
if data is None:
return {}
@ -142,6 +138,7 @@ def load_capabilities(f: TextIO) -> dict[Case, str]:
return {
Case(
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
segment_size=case["zfec"]["segmentSize"],
convergence=decode_bytes(case["convergence"]),
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
fmt=load_format(case["format"]),