Generate and consumer the new structure properly

This commit is contained in:
Jean-Paul Calderone 2023-01-03 19:22:38 -05:00
parent ca00adf2b4
commit fb70ba1867
3 changed files with 203 additions and 2068 deletions

View File

@ -4,28 +4,30 @@ Verify certain results against test vectors with well-known results.
from __future__ import annotations from __future__ import annotations
from time import sleep
from typing import AsyncGenerator, Iterator from typing import AsyncGenerator, Iterator
from hashlib import sha256 from hashlib import sha256
from itertools import product from itertools import starmap, product
from yaml import safe_dump from yaml import safe_dump
from attrs import frozen
from pytest import mark from pytest import mark
from pytest_twisted import ensureDeferred from pytest_twisted import ensureDeferred
from . import vectors from . import vectors
from .util import reconfigure, upload, asyncfoldr, insert, TahoeProcess from .util import reconfigure, upload, TahoeProcess
def digest(bs: bytes) -> bytes: def digest(bs: bytes) -> bytes:
"""
Digest bytes to bytes.
"""
return sha256(bs).digest() return sha256(bs).digest()
def hexdigest(bs: bytes) -> str: def hexdigest(bs: bytes) -> str:
"""
Digest bytes to text.
"""
return sha256(bs).hexdigest() return sha256(bs).hexdigest()
# Just a couple convergence secrets. The only thing we do with this value is # Just a couple convergence secrets. The only thing we do with this value is
# feed it into a tagged hash. It certainly makes a difference to the output # feed it into a tagged hash. It certainly makes a difference to the output
# but the hash should destroy any structure in the input so it doesn't seem # but the hash should destroy any structure in the input so it doesn't seem
@ -35,7 +37,6 @@ CONVERGENCE_SECRETS = [
digest(b"Hello world")[:16], digest(b"Hello world")[:16],
] ]
# Exercise at least a handful of different sizes, trying to cover: # Exercise at least a handful of different sizes, trying to cover:
# #
# 1. Some cases smaller than one "segment" (128k). # 1. Some cases smaller than one "segment" (128k).
@ -51,87 +52,66 @@ CONVERGENCE_SECRETS = [
SEGMENT_SIZE = 128 * 1024 SEGMENT_SIZE = 128 * 1024
OBJECT_DESCRIPTIONS = [ OBJECT_DESCRIPTIONS = [
(b"a", 1024), vectors.Sample(b"a", 1024),
(b"c", 4096), vectors.Sample(b"c", 4096),
(digest(b"foo"), SEGMENT_SIZE - 1), vectors.Sample(digest(b"foo"), SEGMENT_SIZE - 1),
(digest(b"bar"), SEGMENT_SIZE + 1), vectors.Sample(digest(b"bar"), SEGMENT_SIZE + 1),
(digest(b"baz"), SEGMENT_SIZE * 16 - 1), vectors.Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
(digest(b"quux"), SEGMENT_SIZE * 16 + 1), vectors.Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
(digest(b"foobar"), SEGMENT_SIZE * 64 - 1), vectors.Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1), vectors.Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
] ]
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
# SDMF and MDMF encode share counts (N and k) into the share itself as an
# unsigned byte. They could have encoded (share count - 1) to fit the full
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
# inaccessible to those formats and we set the upper bound at 255.
MAX_SHARES_MAP = {
"chk": 256,
"sdmf": 255,
"mdmf": 255,
}
ZFEC_PARAMS = [ ZFEC_PARAMS = [
(1, 1), vectors.SeedParam(1, 1),
(1, 3), vectors.SeedParam(1, 3),
(2, 3), vectors.SeedParam(2, 3),
(3, 10), vectors.SeedParam(3, 10),
(71, 255), vectors.SeedParam(71, 255),
(101, MAX_SHARES), vectors.SeedParam(101, vectors.MAX_SHARES),
] ]
FORMATS = [ FORMATS = [
"chk", "chk",
"sdmf", # "sdmf",
"mdmf", # "mdmf",
] ]
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS))) @mark.parametrize('convergence', CONVERGENCE_SECRETS)
def test_convergence(convergence_idx): def test_convergence(convergence):
""" """
Convergence secrets are 16 bytes. Convergence secrets are 16 bytes.
""" """
convergence = CONVERGENCE_SECRETS[convergence_idx]
assert isinstance(convergence, bytes), "Convergence secret must be bytes" assert isinstance(convergence, bytes), "Convergence secret must be bytes"
assert len(convergence) == 16, "Convergence secret must by 16 bytes" assert len(convergence) == 16, "Convergence secret must by 16 bytes"
@mark.parametrize('params_idx', range(len(ZFEC_PARAMS))) @mark.parametrize('seed_params', ZFEC_PARAMS)
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS))) @mark.parametrize('convergence', CONVERGENCE_SECRETS)
@mark.parametrize('data_idx', range(len(OBJECT_DESCRIPTIONS))) @mark.parametrize('seed_data', OBJECT_DESCRIPTIONS)
@mark.parametrize('fmt_idx', range(len(FORMATS))) @mark.parametrize('fmt', FORMATS)
@ensureDeferred @ensureDeferred
async def test_capability(reactor, request, alice, params_idx, convergence_idx, data_idx, fmt_idx): async def test_capability(reactor, request, alice, seed_params, convergence, seed_data, fmt):
""" """
The capability that results from uploading certain well-known data The capability that results from uploading certain well-known data
with certain well-known parameters results in exactly the previously with certain well-known parameters results in exactly the previously
computed value. computed value.
""" """
case = load_case( case = vectors.Case(seed_params, convergence, seed_data, fmt)
params_idx,
convergence_idx,
data_idx,
fmt_idx,
)
# rewrite alice's config to match params and convergence # rewrite alice's config to match params and convergence
await reconfigure(reactor, request, alice, (1,) + case.params, case.convergence) await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
# upload data in the correct format # upload data in the correct format
actual = upload(alice, case.fmt, case.data) actual = upload(alice, case.fmt, case.data)
# compare the resulting cap to the expected result # compare the resulting cap to the expected result
expected = vectors.capabilities["vector"][case.key] expected = vectors.capabilities[case]
assert actual == expected assert actual == expected
@ensureDeferred @ensureDeferred
async def skiptest_generate(reactor, request, alice): async def test_generate(reactor, request, alice):
""" """
This is a helper for generating the test vectors. This is a helper for generating the test vectors.
@ -141,27 +121,34 @@ async def skiptest_generate(reactor, request, alice):
to run against the results produced originally, not a possibly to run against the results produced originally, not a possibly
ever-changing set of outputs. ever-changing set of outputs.
""" """
space = product( space = starmap(vectors.Case, product(
range(len(ZFEC_PARAMS)), ZFEC_PARAMS,
range(len(CONVERGENCE_SECRETS)), CONVERGENCE_SECRETS,
range(len(OBJECT_DESCRIPTIONS)), OBJECT_DESCRIPTIONS,
range(len(FORMATS)), FORMATS,
) ))
results = await asyncfoldr( results = generate(reactor, request, alice, space)
generate(reactor, request, alice, space),
insert,
{},
)
with vectors.DATA_PATH.open("w") as f: with vectors.DATA_PATH.open("w") as f:
f.write(safe_dump({ f.write(safe_dump({
"version": "2022-12-26", "version": "2023-01-03",
"params": { "vector": [
"zfec": ZFEC_PARAMS, {
"convergence": CONVERGENCE_SECRETS, "convergence": vectors.encode_bytes(case.convergence),
"objects": OBJECT_DESCRIPTIONS, "format": case.fmt,
"formats": FORMATS, "sample": {
}, "seed": vectors.encode_bytes(case.seed_data.seed),
"vector": results, "length": case.seed_data.length,
},
"zfec": {
"segmentSize": SEGMENT_SIZE,
"required": case.seed_params.required,
"total": case.seed_params.total,
},
"expected": cap,
}
async for (case, cap)
in results
],
})) }))
@ -169,8 +156,8 @@ async def generate(
reactor, reactor,
request, request,
alice: TahoeProcess, alice: TahoeProcess,
space: Iterator[int, int, int, int], cases: Iterator[vectors.Case],
) -> AsyncGenerator[tuple[str, str], None]: ) -> AsyncGenerator[[vectors.Case, str], None]:
""" """
Generate all of the test vectors using the given node. Generate all of the test vectors using the given node.
@ -184,79 +171,21 @@ async def generate(
:param alice: The Tahoe-LAFS node to use to generate the test vectors. :param alice: The Tahoe-LAFS node to use to generate the test vectors.
:param space: An iterator of coordinates in the test vector space for :param case: The inputs for which to generate a value.
which to generate values. The elements of each tuple give indexes into
ZFEC_PARAMS, CONVERGENCE_SECRETS, OBJECT_DESCRIPTIONS, and FORMATS.
:return: The yield values are two-tuples describing a test vector. The :return: The capability for the case.
first element is a string describing a case and the second element is
the capability for that case.
""" """
# Share placement doesn't affect the resulting capability. For maximum # Share placement doesn't affect the resulting capability. For maximum
# reliability of this generator, be happy if we can put shares anywhere # reliability of this generator, be happy if we can put shares anywhere
happy = 1 happy = 1
node_key = (None, None) for case in cases:
for params_idx, secret_idx, data_idx, fmt_idx in space: await reconfigure(
case = load_case(params_idx, secret_idx, data_idx, fmt_idx) reactor,
if node_key != (case.params, case.convergence): request,
await reconfigure(reactor, request, alice, (happy,) + case.params, case.convergence) alice,
node_key = (case.params, case.convergence) (happy, case.params.required, case.params.total),
case.convergence
)
cap = upload(alice, case.fmt, case.data) cap = upload(alice, case.fmt, case.data)
yield case.key, cap yield case, cap
def key(params: int, secret: int, data: int, fmt: int) -> str:
"""
Construct the key describing the case defined by the given parameters.
The parameters are indexes into the test data for a certain case.
:return: A distinct string for the given inputs.
"""
return f"{params}-{secret}-{data}-{fmt}"
def stretch(seed: bytes, size: int) -> bytes:
"""
Given a simple description of a byte string, return the byte string
itself.
"""
assert isinstance(seed, bytes)
assert isinstance(size, int)
assert size > 0
assert len(seed) > 0
multiples = size // len(seed) + 1
return (seed * multiples)[:size]
def load_case(
params_idx: int,
convergence_idx: int,
data_idx: int,
fmt_idx: int
) -> Case:
"""
:return:
"""
params = ZFEC_PARAMS[params_idx]
fmt = FORMATS[fmt_idx]
convergence = CONVERGENCE_SECRETS[convergence_idx]
data = stretch(*OBJECT_DESCRIPTIONS[data_idx])
if params[1] == MAX_SHARES:
params = (params[0], MAX_SHARES_MAP[fmt])
k = key(params_idx, convergence_idx, data_idx, fmt_idx)
return Case(k, fmt, params, convergence, data)
@frozen
class Case:
"""
Represent one case for which we want/have a test vector.
"""
key: str
fmt: str
params: tuple[int, int]
convergence: bytes
data: bytes

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,144 @@
""" """
A module that loads pre-generated test vectors. A module that loads pre-generated test vectors.
:ivar CHK_PATH: The path of the file containing CHK test vectors. :ivar DATA_PATH: The path of the file containing test vectors.
:ivar chk: The CHK test vectors. :ivar capabilities: The CHK test vectors.
""" """
from __future__ import annotations
from typing import TextIO
from attrs import frozen
from yaml import safe_load from yaml import safe_load
from pathlib import Path from pathlib import Path
from base64 import b64encode, b64decode
DATA_PATH: Path = Path(__file__).parent / "test_vectors.yaml" DATA_PATH: Path = Path(__file__).parent / "test_vectors.yaml"
@frozen
class Sample:
"""
Some instructions for building a long byte string.
:ivar seed: Some bytes to repeat some times to produce the string.
:ivar length: The length of the desired byte string.
"""
seed: bytes
length: int
@frozen
class Param:
"""
Some ZFEC parameters.
"""
required: int
total: int
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
# SDMF and MDMF encode share counts (N and k) into the share itself as an
# unsigned byte. They could have encoded (share count - 1) to fit the full
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
# inaccessible to those formats and we set the upper bound at 255.
MAX_SHARES_MAP = {
"chk": 256,
"sdmf": 255,
"mdmf": 255,
}
@frozen
class SeedParam:
"""
Some ZFEC parameters, almost.
:ivar required: The number of required shares.
:ivar total: Either the number of total shares or the constant
``MAX_SHARES`` to indicate that the total number of shares should be
the maximum number supported by the object format.
"""
required: int
total: int | str
def realize(self, max_total: int) -> Param:
"""
Create a ``Param`` from this object's values, possibly
substituting the given real value for total if necessary.
:param max_total: The value to use to replace ``MAX_SHARES`` if
necessary.
"""
if self.total == MAX_SHARES:
return Param(self.required, max_total)
return Param(self.required, self.total)
@frozen
class Case:
"""
Represent one case for which we want/have a test vector.
"""
seed_params: Param
convergence: bytes
seed_data: Sample
fmt: str
@property
def data(self):
return stretch(self.seed_data.seed, self.seed_data.length)
@property
def params(self):
return self.seed_params.realize(MAX_SHARES_MAP[self.fmt])
def encode_bytes(b: bytes) -> str:
"""
Base64 encode some bytes to text so they are representable in JSON.
"""
return b64encode(b).decode("ascii")
def decode_bytes(b: str) -> bytes:
"""
Base64 decode some text to bytes.
"""
return b64decode(b.encode("ascii"))
def stretch(seed: bytes, size: int) -> bytes:
"""
Given a simple description of a byte string, return the byte string
itself.
"""
assert isinstance(seed, bytes)
assert isinstance(size, int)
assert size > 0
assert len(seed) > 0
multiples = size // len(seed) + 1
return (seed * multiples)[:size]
def load_capabilities(f: TextIO) -> dict[Case, str]:
data = safe_load(f)
return {
Case(
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
convergence=decode_bytes(case["convergence"]),
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
fmt=case["format"],
): case["expected"]
for case
in data["vector"]
}
try: try:
with DATA_PATH.open() as f: with DATA_PATH.open() as f:
capabilities: dict[str, str] = safe_load(f) capabilities: dict[Case, str] = load_capabilities(f)
except FileNotFoundError: except FileNotFoundError:
capabilities = {} capabilities = {}