mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-21 13:57:51 +00:00
Generate and consumer the new structure properly
This commit is contained in:
parent
ca00adf2b4
commit
fb70ba1867
@ -4,28 +4,30 @@ Verify certain results against test vectors with well-known results.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from time import sleep
|
||||
from typing import AsyncGenerator, Iterator
|
||||
from hashlib import sha256
|
||||
from itertools import product
|
||||
from itertools import starmap, product
|
||||
from yaml import safe_dump
|
||||
|
||||
from attrs import frozen
|
||||
|
||||
from pytest import mark
|
||||
from pytest_twisted import ensureDeferred
|
||||
|
||||
from . import vectors
|
||||
from .util import reconfigure, upload, asyncfoldr, insert, TahoeProcess
|
||||
from .util import reconfigure, upload, TahoeProcess
|
||||
|
||||
def digest(bs: bytes) -> bytes:
|
||||
"""
|
||||
Digest bytes to bytes.
|
||||
"""
|
||||
return sha256(bs).digest()
|
||||
|
||||
|
||||
def hexdigest(bs: bytes) -> str:
|
||||
"""
|
||||
Digest bytes to text.
|
||||
"""
|
||||
return sha256(bs).hexdigest()
|
||||
|
||||
|
||||
# Just a couple convergence secrets. The only thing we do with this value is
|
||||
# feed it into a tagged hash. It certainly makes a difference to the output
|
||||
# but the hash should destroy any structure in the input so it doesn't seem
|
||||
@ -35,7 +37,6 @@ CONVERGENCE_SECRETS = [
|
||||
digest(b"Hello world")[:16],
|
||||
]
|
||||
|
||||
|
||||
# Exercise at least a handful of different sizes, trying to cover:
|
||||
#
|
||||
# 1. Some cases smaller than one "segment" (128k).
|
||||
@ -51,87 +52,66 @@ CONVERGENCE_SECRETS = [
|
||||
|
||||
SEGMENT_SIZE = 128 * 1024
|
||||
OBJECT_DESCRIPTIONS = [
|
||||
(b"a", 1024),
|
||||
(b"c", 4096),
|
||||
(digest(b"foo"), SEGMENT_SIZE - 1),
|
||||
(digest(b"bar"), SEGMENT_SIZE + 1),
|
||||
(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
|
||||
(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
|
||||
(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
|
||||
(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
|
||||
vectors.Sample(b"a", 1024),
|
||||
vectors.Sample(b"c", 4096),
|
||||
vectors.Sample(digest(b"foo"), SEGMENT_SIZE - 1),
|
||||
vectors.Sample(digest(b"bar"), SEGMENT_SIZE + 1),
|
||||
vectors.Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
|
||||
vectors.Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
|
||||
vectors.Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
|
||||
vectors.Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
|
||||
]
|
||||
|
||||
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
|
||||
# Represent max symbolically and resolve it when we know what format we're
|
||||
# dealing with.
|
||||
MAX_SHARES = "max"
|
||||
|
||||
# SDMF and MDMF encode share counts (N and k) into the share itself as an
|
||||
# unsigned byte. They could have encoded (share count - 1) to fit the full
|
||||
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
|
||||
# inaccessible to those formats and we set the upper bound at 255.
|
||||
MAX_SHARES_MAP = {
|
||||
"chk": 256,
|
||||
"sdmf": 255,
|
||||
"mdmf": 255,
|
||||
}
|
||||
|
||||
ZFEC_PARAMS = [
|
||||
(1, 1),
|
||||
(1, 3),
|
||||
(2, 3),
|
||||
(3, 10),
|
||||
(71, 255),
|
||||
(101, MAX_SHARES),
|
||||
vectors.SeedParam(1, 1),
|
||||
vectors.SeedParam(1, 3),
|
||||
vectors.SeedParam(2, 3),
|
||||
vectors.SeedParam(3, 10),
|
||||
vectors.SeedParam(71, 255),
|
||||
vectors.SeedParam(101, vectors.MAX_SHARES),
|
||||
]
|
||||
|
||||
FORMATS = [
|
||||
"chk",
|
||||
"sdmf",
|
||||
"mdmf",
|
||||
# "sdmf",
|
||||
# "mdmf",
|
||||
]
|
||||
|
||||
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
||||
def test_convergence(convergence_idx):
|
||||
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
|
||||
def test_convergence(convergence):
|
||||
"""
|
||||
Convergence secrets are 16 bytes.
|
||||
"""
|
||||
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
||||
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
|
||||
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
|
||||
|
||||
|
||||
@mark.parametrize('params_idx', range(len(ZFEC_PARAMS)))
|
||||
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
||||
@mark.parametrize('data_idx', range(len(OBJECT_DESCRIPTIONS)))
|
||||
@mark.parametrize('fmt_idx', range(len(FORMATS)))
|
||||
@mark.parametrize('seed_params', ZFEC_PARAMS)
|
||||
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
|
||||
@mark.parametrize('seed_data', OBJECT_DESCRIPTIONS)
|
||||
@mark.parametrize('fmt', FORMATS)
|
||||
@ensureDeferred
|
||||
async def test_capability(reactor, request, alice, params_idx, convergence_idx, data_idx, fmt_idx):
|
||||
async def test_capability(reactor, request, alice, seed_params, convergence, seed_data, fmt):
|
||||
"""
|
||||
The capability that results from uploading certain well-known data
|
||||
with certain well-known parameters results in exactly the previously
|
||||
computed value.
|
||||
"""
|
||||
case = load_case(
|
||||
params_idx,
|
||||
convergence_idx,
|
||||
data_idx,
|
||||
fmt_idx,
|
||||
)
|
||||
case = vectors.Case(seed_params, convergence, seed_data, fmt)
|
||||
|
||||
# rewrite alice's config to match params and convergence
|
||||
await reconfigure(reactor, request, alice, (1,) + case.params, case.convergence)
|
||||
await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
|
||||
|
||||
# upload data in the correct format
|
||||
actual = upload(alice, case.fmt, case.data)
|
||||
|
||||
# compare the resulting cap to the expected result
|
||||
expected = vectors.capabilities["vector"][case.key]
|
||||
expected = vectors.capabilities[case]
|
||||
assert actual == expected
|
||||
|
||||
|
||||
@ensureDeferred
|
||||
async def skiptest_generate(reactor, request, alice):
|
||||
async def test_generate(reactor, request, alice):
|
||||
"""
|
||||
This is a helper for generating the test vectors.
|
||||
|
||||
@ -141,27 +121,34 @@ async def skiptest_generate(reactor, request, alice):
|
||||
to run against the results produced originally, not a possibly
|
||||
ever-changing set of outputs.
|
||||
"""
|
||||
space = product(
|
||||
range(len(ZFEC_PARAMS)),
|
||||
range(len(CONVERGENCE_SECRETS)),
|
||||
range(len(OBJECT_DESCRIPTIONS)),
|
||||
range(len(FORMATS)),
|
||||
)
|
||||
results = await asyncfoldr(
|
||||
generate(reactor, request, alice, space),
|
||||
insert,
|
||||
{},
|
||||
)
|
||||
space = starmap(vectors.Case, product(
|
||||
ZFEC_PARAMS,
|
||||
CONVERGENCE_SECRETS,
|
||||
OBJECT_DESCRIPTIONS,
|
||||
FORMATS,
|
||||
))
|
||||
results = generate(reactor, request, alice, space)
|
||||
with vectors.DATA_PATH.open("w") as f:
|
||||
f.write(safe_dump({
|
||||
"version": "2022-12-26",
|
||||
"params": {
|
||||
"zfec": ZFEC_PARAMS,
|
||||
"convergence": CONVERGENCE_SECRETS,
|
||||
"objects": OBJECT_DESCRIPTIONS,
|
||||
"formats": FORMATS,
|
||||
},
|
||||
"vector": results,
|
||||
"version": "2023-01-03",
|
||||
"vector": [
|
||||
{
|
||||
"convergence": vectors.encode_bytes(case.convergence),
|
||||
"format": case.fmt,
|
||||
"sample": {
|
||||
"seed": vectors.encode_bytes(case.seed_data.seed),
|
||||
"length": case.seed_data.length,
|
||||
},
|
||||
"zfec": {
|
||||
"segmentSize": SEGMENT_SIZE,
|
||||
"required": case.seed_params.required,
|
||||
"total": case.seed_params.total,
|
||||
},
|
||||
"expected": cap,
|
||||
}
|
||||
async for (case, cap)
|
||||
in results
|
||||
],
|
||||
}))
|
||||
|
||||
|
||||
@ -169,8 +156,8 @@ async def generate(
|
||||
reactor,
|
||||
request,
|
||||
alice: TahoeProcess,
|
||||
space: Iterator[int, int, int, int],
|
||||
) -> AsyncGenerator[tuple[str, str], None]:
|
||||
cases: Iterator[vectors.Case],
|
||||
) -> AsyncGenerator[[vectors.Case, str], None]:
|
||||
"""
|
||||
Generate all of the test vectors using the given node.
|
||||
|
||||
@ -184,79 +171,21 @@ async def generate(
|
||||
|
||||
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
|
||||
|
||||
:param space: An iterator of coordinates in the test vector space for
|
||||
which to generate values. The elements of each tuple give indexes into
|
||||
ZFEC_PARAMS, CONVERGENCE_SECRETS, OBJECT_DESCRIPTIONS, and FORMATS.
|
||||
:param case: The inputs for which to generate a value.
|
||||
|
||||
:return: The yield values are two-tuples describing a test vector. The
|
||||
first element is a string describing a case and the second element is
|
||||
the capability for that case.
|
||||
:return: The capability for the case.
|
||||
"""
|
||||
# Share placement doesn't affect the resulting capability. For maximum
|
||||
# reliability of this generator, be happy if we can put shares anywhere
|
||||
happy = 1
|
||||
node_key = (None, None)
|
||||
for params_idx, secret_idx, data_idx, fmt_idx in space:
|
||||
case = load_case(params_idx, secret_idx, data_idx, fmt_idx)
|
||||
if node_key != (case.params, case.convergence):
|
||||
await reconfigure(reactor, request, alice, (happy,) + case.params, case.convergence)
|
||||
node_key = (case.params, case.convergence)
|
||||
for case in cases:
|
||||
await reconfigure(
|
||||
reactor,
|
||||
request,
|
||||
alice,
|
||||
(happy, case.params.required, case.params.total),
|
||||
case.convergence
|
||||
)
|
||||
|
||||
cap = upload(alice, case.fmt, case.data)
|
||||
yield case.key, cap
|
||||
|
||||
|
||||
def key(params: int, secret: int, data: int, fmt: int) -> str:
|
||||
"""
|
||||
Construct the key describing the case defined by the given parameters.
|
||||
|
||||
The parameters are indexes into the test data for a certain case.
|
||||
|
||||
:return: A distinct string for the given inputs.
|
||||
"""
|
||||
return f"{params}-{secret}-{data}-{fmt}"
|
||||
|
||||
|
||||
def stretch(seed: bytes, size: int) -> bytes:
|
||||
"""
|
||||
Given a simple description of a byte string, return the byte string
|
||||
itself.
|
||||
"""
|
||||
assert isinstance(seed, bytes)
|
||||
assert isinstance(size, int)
|
||||
assert size > 0
|
||||
assert len(seed) > 0
|
||||
|
||||
multiples = size // len(seed) + 1
|
||||
return (seed * multiples)[:size]
|
||||
|
||||
|
||||
def load_case(
|
||||
params_idx: int,
|
||||
convergence_idx: int,
|
||||
data_idx: int,
|
||||
fmt_idx: int
|
||||
) -> Case:
|
||||
"""
|
||||
:return:
|
||||
"""
|
||||
params = ZFEC_PARAMS[params_idx]
|
||||
fmt = FORMATS[fmt_idx]
|
||||
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
||||
data = stretch(*OBJECT_DESCRIPTIONS[data_idx])
|
||||
if params[1] == MAX_SHARES:
|
||||
params = (params[0], MAX_SHARES_MAP[fmt])
|
||||
k = key(params_idx, convergence_idx, data_idx, fmt_idx)
|
||||
return Case(k, fmt, params, convergence, data)
|
||||
|
||||
|
||||
@frozen
|
||||
class Case:
|
||||
"""
|
||||
Represent one case for which we want/have a test vector.
|
||||
"""
|
||||
key: str
|
||||
fmt: str
|
||||
params: tuple[int, int]
|
||||
convergence: bytes
|
||||
data: bytes
|
||||
yield case, cap
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,18 +1,144 @@
|
||||
"""
|
||||
A module that loads pre-generated test vectors.
|
||||
|
||||
:ivar CHK_PATH: The path of the file containing CHK test vectors.
|
||||
:ivar DATA_PATH: The path of the file containing test vectors.
|
||||
|
||||
:ivar chk: The CHK test vectors.
|
||||
:ivar capabilities: The CHK test vectors.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TextIO
|
||||
from attrs import frozen
|
||||
from yaml import safe_load
|
||||
from pathlib import Path
|
||||
from base64 import b64encode, b64decode
|
||||
|
||||
DATA_PATH: Path = Path(__file__).parent / "test_vectors.yaml"
|
||||
|
||||
@frozen
|
||||
class Sample:
|
||||
"""
|
||||
Some instructions for building a long byte string.
|
||||
|
||||
:ivar seed: Some bytes to repeat some times to produce the string.
|
||||
:ivar length: The length of the desired byte string.
|
||||
"""
|
||||
seed: bytes
|
||||
length: int
|
||||
|
||||
@frozen
|
||||
class Param:
|
||||
"""
|
||||
Some ZFEC parameters.
|
||||
"""
|
||||
required: int
|
||||
total: int
|
||||
|
||||
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
|
||||
# Represent max symbolically and resolve it when we know what format we're
|
||||
# dealing with.
|
||||
MAX_SHARES = "max"
|
||||
|
||||
# SDMF and MDMF encode share counts (N and k) into the share itself as an
|
||||
# unsigned byte. They could have encoded (share count - 1) to fit the full
|
||||
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
|
||||
# inaccessible to those formats and we set the upper bound at 255.
|
||||
MAX_SHARES_MAP = {
|
||||
"chk": 256,
|
||||
"sdmf": 255,
|
||||
"mdmf": 255,
|
||||
}
|
||||
|
||||
@frozen
|
||||
class SeedParam:
|
||||
"""
|
||||
Some ZFEC parameters, almost.
|
||||
|
||||
:ivar required: The number of required shares.
|
||||
|
||||
:ivar total: Either the number of total shares or the constant
|
||||
``MAX_SHARES`` to indicate that the total number of shares should be
|
||||
the maximum number supported by the object format.
|
||||
"""
|
||||
required: int
|
||||
total: int | str
|
||||
|
||||
def realize(self, max_total: int) -> Param:
|
||||
"""
|
||||
Create a ``Param`` from this object's values, possibly
|
||||
substituting the given real value for total if necessary.
|
||||
|
||||
:param max_total: The value to use to replace ``MAX_SHARES`` if
|
||||
necessary.
|
||||
"""
|
||||
if self.total == MAX_SHARES:
|
||||
return Param(self.required, max_total)
|
||||
return Param(self.required, self.total)
|
||||
|
||||
@frozen
|
||||
class Case:
|
||||
"""
|
||||
Represent one case for which we want/have a test vector.
|
||||
"""
|
||||
seed_params: Param
|
||||
convergence: bytes
|
||||
seed_data: Sample
|
||||
fmt: str
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
return stretch(self.seed_data.seed, self.seed_data.length)
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
return self.seed_params.realize(MAX_SHARES_MAP[self.fmt])
|
||||
|
||||
|
||||
def encode_bytes(b: bytes) -> str:
|
||||
"""
|
||||
Base64 encode some bytes to text so they are representable in JSON.
|
||||
"""
|
||||
return b64encode(b).decode("ascii")
|
||||
|
||||
|
||||
def decode_bytes(b: str) -> bytes:
|
||||
"""
|
||||
Base64 decode some text to bytes.
|
||||
"""
|
||||
return b64decode(b.encode("ascii"))
|
||||
|
||||
|
||||
def stretch(seed: bytes, size: int) -> bytes:
|
||||
"""
|
||||
Given a simple description of a byte string, return the byte string
|
||||
itself.
|
||||
"""
|
||||
assert isinstance(seed, bytes)
|
||||
assert isinstance(size, int)
|
||||
assert size > 0
|
||||
assert len(seed) > 0
|
||||
|
||||
multiples = size // len(seed) + 1
|
||||
return (seed * multiples)[:size]
|
||||
|
||||
|
||||
def load_capabilities(f: TextIO) -> dict[Case, str]:
|
||||
data = safe_load(f)
|
||||
return {
|
||||
Case(
|
||||
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
|
||||
convergence=decode_bytes(case["convergence"]),
|
||||
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
|
||||
fmt=case["format"],
|
||||
): case["expected"]
|
||||
for case
|
||||
in data["vector"]
|
||||
}
|
||||
|
||||
|
||||
try:
|
||||
with DATA_PATH.open() as f:
|
||||
capabilities: dict[str, str] = safe_load(f)
|
||||
capabilities: dict[Case, str] = load_capabilities(f)
|
||||
except FileNotFoundError:
|
||||
capabilities = {}
|
||||
|
Loading…
Reference in New Issue
Block a user