Generate and consumer the new structure properly

This commit is contained in:
Jean-Paul Calderone 2023-01-03 19:22:38 -05:00
parent ca00adf2b4
commit fb70ba1867
3 changed files with 203 additions and 2068 deletions

View File

@ -4,28 +4,30 @@ Verify certain results against test vectors with well-known results.
from __future__ import annotations
from time import sleep
from typing import AsyncGenerator, Iterator
from hashlib import sha256
from itertools import product
from itertools import starmap, product
from yaml import safe_dump
from attrs import frozen
from pytest import mark
from pytest_twisted import ensureDeferred
from . import vectors
from .util import reconfigure, upload, asyncfoldr, insert, TahoeProcess
from .util import reconfigure, upload, TahoeProcess
def digest(bs: bytes) -> bytes:
"""
Digest bytes to bytes.
"""
return sha256(bs).digest()
def hexdigest(bs: bytes) -> str:
"""
Digest bytes to text.
"""
return sha256(bs).hexdigest()
# Just a couple convergence secrets. The only thing we do with this value is
# feed it into a tagged hash. It certainly makes a difference to the output
# but the hash should destroy any structure in the input so it doesn't seem
@ -35,7 +37,6 @@ CONVERGENCE_SECRETS = [
digest(b"Hello world")[:16],
]
# Exercise at least a handful of different sizes, trying to cover:
#
# 1. Some cases smaller than one "segment" (128k).
@ -51,87 +52,66 @@ CONVERGENCE_SECRETS = [
SEGMENT_SIZE = 128 * 1024
OBJECT_DESCRIPTIONS = [
(b"a", 1024),
(b"c", 4096),
(digest(b"foo"), SEGMENT_SIZE - 1),
(digest(b"bar"), SEGMENT_SIZE + 1),
(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
vectors.Sample(b"a", 1024),
vectors.Sample(b"c", 4096),
vectors.Sample(digest(b"foo"), SEGMENT_SIZE - 1),
vectors.Sample(digest(b"bar"), SEGMENT_SIZE + 1),
vectors.Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
vectors.Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
vectors.Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
vectors.Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
]
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
# SDMF and MDMF encode share counts (N and k) into the share itself as an
# unsigned byte. They could have encoded (share count - 1) to fit the full
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
# inaccessible to those formats and we set the upper bound at 255.
MAX_SHARES_MAP = {
"chk": 256,
"sdmf": 255,
"mdmf": 255,
}
ZFEC_PARAMS = [
(1, 1),
(1, 3),
(2, 3),
(3, 10),
(71, 255),
(101, MAX_SHARES),
vectors.SeedParam(1, 1),
vectors.SeedParam(1, 3),
vectors.SeedParam(2, 3),
vectors.SeedParam(3, 10),
vectors.SeedParam(71, 255),
vectors.SeedParam(101, vectors.MAX_SHARES),
]
FORMATS = [
"chk",
"sdmf",
"mdmf",
# "sdmf",
# "mdmf",
]
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
def test_convergence(convergence_idx):
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
def test_convergence(convergence):
"""
Convergence secrets are 16 bytes.
"""
convergence = CONVERGENCE_SECRETS[convergence_idx]
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
@mark.parametrize('params_idx', range(len(ZFEC_PARAMS)))
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
@mark.parametrize('data_idx', range(len(OBJECT_DESCRIPTIONS)))
@mark.parametrize('fmt_idx', range(len(FORMATS)))
@mark.parametrize('seed_params', ZFEC_PARAMS)
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
@mark.parametrize('seed_data', OBJECT_DESCRIPTIONS)
@mark.parametrize('fmt', FORMATS)
@ensureDeferred
async def test_capability(reactor, request, alice, params_idx, convergence_idx, data_idx, fmt_idx):
async def test_capability(reactor, request, alice, seed_params, convergence, seed_data, fmt):
"""
The capability that results from uploading certain well-known data
with certain well-known parameters results in exactly the previously
computed value.
"""
case = load_case(
params_idx,
convergence_idx,
data_idx,
fmt_idx,
)
case = vectors.Case(seed_params, convergence, seed_data, fmt)
# rewrite alice's config to match params and convergence
await reconfigure(reactor, request, alice, (1,) + case.params, case.convergence)
await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
# upload data in the correct format
actual = upload(alice, case.fmt, case.data)
# compare the resulting cap to the expected result
expected = vectors.capabilities["vector"][case.key]
expected = vectors.capabilities[case]
assert actual == expected
@ensureDeferred
async def skiptest_generate(reactor, request, alice):
async def test_generate(reactor, request, alice):
"""
This is a helper for generating the test vectors.
@ -141,27 +121,34 @@ async def skiptest_generate(reactor, request, alice):
to run against the results produced originally, not a possibly
ever-changing set of outputs.
"""
space = product(
range(len(ZFEC_PARAMS)),
range(len(CONVERGENCE_SECRETS)),
range(len(OBJECT_DESCRIPTIONS)),
range(len(FORMATS)),
)
results = await asyncfoldr(
generate(reactor, request, alice, space),
insert,
{},
)
space = starmap(vectors.Case, product(
ZFEC_PARAMS,
CONVERGENCE_SECRETS,
OBJECT_DESCRIPTIONS,
FORMATS,
))
results = generate(reactor, request, alice, space)
with vectors.DATA_PATH.open("w") as f:
f.write(safe_dump({
"version": "2022-12-26",
"params": {
"zfec": ZFEC_PARAMS,
"convergence": CONVERGENCE_SECRETS,
"objects": OBJECT_DESCRIPTIONS,
"formats": FORMATS,
},
"vector": results,
"version": "2023-01-03",
"vector": [
{
"convergence": vectors.encode_bytes(case.convergence),
"format": case.fmt,
"sample": {
"seed": vectors.encode_bytes(case.seed_data.seed),
"length": case.seed_data.length,
},
"zfec": {
"segmentSize": SEGMENT_SIZE,
"required": case.seed_params.required,
"total": case.seed_params.total,
},
"expected": cap,
}
async for (case, cap)
in results
],
}))
@ -169,8 +156,8 @@ async def generate(
reactor,
request,
alice: TahoeProcess,
space: Iterator[int, int, int, int],
) -> AsyncGenerator[tuple[str, str], None]:
cases: Iterator[vectors.Case],
) -> AsyncGenerator[[vectors.Case, str], None]:
"""
Generate all of the test vectors using the given node.
@ -184,79 +171,21 @@ async def generate(
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
:param space: An iterator of coordinates in the test vector space for
which to generate values. The elements of each tuple give indexes into
ZFEC_PARAMS, CONVERGENCE_SECRETS, OBJECT_DESCRIPTIONS, and FORMATS.
:param case: The inputs for which to generate a value.
:return: The yield values are two-tuples describing a test vector. The
first element is a string describing a case and the second element is
the capability for that case.
:return: The capability for the case.
"""
# Share placement doesn't affect the resulting capability. For maximum
# reliability of this generator, be happy if we can put shares anywhere
happy = 1
node_key = (None, None)
for params_idx, secret_idx, data_idx, fmt_idx in space:
case = load_case(params_idx, secret_idx, data_idx, fmt_idx)
if node_key != (case.params, case.convergence):
await reconfigure(reactor, request, alice, (happy,) + case.params, case.convergence)
node_key = (case.params, case.convergence)
for case in cases:
await reconfigure(
reactor,
request,
alice,
(happy, case.params.required, case.params.total),
case.convergence
)
cap = upload(alice, case.fmt, case.data)
yield case.key, cap
def key(params: int, secret: int, data: int, fmt: int) -> str:
"""
Construct the key describing the case defined by the given parameters.
The parameters are indexes into the test data for a certain case.
:return: A distinct string for the given inputs.
"""
return f"{params}-{secret}-{data}-{fmt}"
def stretch(seed: bytes, size: int) -> bytes:
"""
Given a simple description of a byte string, return the byte string
itself.
"""
assert isinstance(seed, bytes)
assert isinstance(size, int)
assert size > 0
assert len(seed) > 0
multiples = size // len(seed) + 1
return (seed * multiples)[:size]
def load_case(
params_idx: int,
convergence_idx: int,
data_idx: int,
fmt_idx: int
) -> Case:
"""
:return:
"""
params = ZFEC_PARAMS[params_idx]
fmt = FORMATS[fmt_idx]
convergence = CONVERGENCE_SECRETS[convergence_idx]
data = stretch(*OBJECT_DESCRIPTIONS[data_idx])
if params[1] == MAX_SHARES:
params = (params[0], MAX_SHARES_MAP[fmt])
k = key(params_idx, convergence_idx, data_idx, fmt_idx)
return Case(k, fmt, params, convergence, data)
@frozen
class Case:
"""
Represent one case for which we want/have a test vector.
"""
key: str
fmt: str
params: tuple[int, int]
convergence: bytes
data: bytes
yield case, cap

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,144 @@
"""
A module that loads pre-generated test vectors.
:ivar CHK_PATH: The path of the file containing CHK test vectors.
:ivar DATA_PATH: The path of the file containing test vectors.
:ivar chk: The CHK test vectors.
:ivar capabilities: The CHK test vectors.
"""
from __future__ import annotations
from typing import TextIO
from attrs import frozen
from yaml import safe_load
from pathlib import Path
from base64 import b64encode, b64decode
DATA_PATH: Path = Path(__file__).parent / "test_vectors.yaml"
@frozen
class Sample:
"""
Some instructions for building a long byte string.
:ivar seed: Some bytes to repeat some times to produce the string.
:ivar length: The length of the desired byte string.
"""
seed: bytes
length: int
@frozen
class Param:
"""
Some ZFEC parameters.
"""
required: int
total: int
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
# SDMF and MDMF encode share counts (N and k) into the share itself as an
# unsigned byte. They could have encoded (share count - 1) to fit the full
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
# inaccessible to those formats and we set the upper bound at 255.
MAX_SHARES_MAP = {
"chk": 256,
"sdmf": 255,
"mdmf": 255,
}
@frozen
class SeedParam:
"""
Some ZFEC parameters, almost.
:ivar required: The number of required shares.
:ivar total: Either the number of total shares or the constant
``MAX_SHARES`` to indicate that the total number of shares should be
the maximum number supported by the object format.
"""
required: int
total: int | str
def realize(self, max_total: int) -> Param:
"""
Create a ``Param`` from this object's values, possibly
substituting the given real value for total if necessary.
:param max_total: The value to use to replace ``MAX_SHARES`` if
necessary.
"""
if self.total == MAX_SHARES:
return Param(self.required, max_total)
return Param(self.required, self.total)
@frozen
class Case:
"""
Represent one case for which we want/have a test vector.
"""
seed_params: Param
convergence: bytes
seed_data: Sample
fmt: str
@property
def data(self):
return stretch(self.seed_data.seed, self.seed_data.length)
@property
def params(self):
return self.seed_params.realize(MAX_SHARES_MAP[self.fmt])
def encode_bytes(b: bytes) -> str:
"""
Base64 encode some bytes to text so they are representable in JSON.
"""
return b64encode(b).decode("ascii")
def decode_bytes(b: str) -> bytes:
"""
Base64 decode some text to bytes.
"""
return b64decode(b.encode("ascii"))
def stretch(seed: bytes, size: int) -> bytes:
"""
Given a simple description of a byte string, return the byte string
itself.
"""
assert isinstance(seed, bytes)
assert isinstance(size, int)
assert size > 0
assert len(seed) > 0
multiples = size // len(seed) + 1
return (seed * multiples)[:size]
def load_capabilities(f: TextIO) -> dict[Case, str]:
data = safe_load(f)
return {
Case(
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
convergence=decode_bytes(case["convergence"]),
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
fmt=case["format"],
): case["expected"]
for case
in data["vector"]
}
try:
with DATA_PATH.open() as f:
capabilities: dict[str, str] = safe_load(f)
capabilities: dict[Case, str] = load_capabilities(f)
except FileNotFoundError:
capabilities = {}