mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-21 22:07:51 +00:00
Generate and consumer the new structure properly
This commit is contained in:
parent
ca00adf2b4
commit
fb70ba1867
@ -4,28 +4,30 @@ Verify certain results against test vectors with well-known results.
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from time import sleep
|
|
||||||
from typing import AsyncGenerator, Iterator
|
from typing import AsyncGenerator, Iterator
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from itertools import product
|
from itertools import starmap, product
|
||||||
from yaml import safe_dump
|
from yaml import safe_dump
|
||||||
|
|
||||||
from attrs import frozen
|
|
||||||
|
|
||||||
from pytest import mark
|
from pytest import mark
|
||||||
from pytest_twisted import ensureDeferred
|
from pytest_twisted import ensureDeferred
|
||||||
|
|
||||||
from . import vectors
|
from . import vectors
|
||||||
from .util import reconfigure, upload, asyncfoldr, insert, TahoeProcess
|
from .util import reconfigure, upload, TahoeProcess
|
||||||
|
|
||||||
def digest(bs: bytes) -> bytes:
|
def digest(bs: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Digest bytes to bytes.
|
||||||
|
"""
|
||||||
return sha256(bs).digest()
|
return sha256(bs).digest()
|
||||||
|
|
||||||
|
|
||||||
def hexdigest(bs: bytes) -> str:
|
def hexdigest(bs: bytes) -> str:
|
||||||
|
"""
|
||||||
|
Digest bytes to text.
|
||||||
|
"""
|
||||||
return sha256(bs).hexdigest()
|
return sha256(bs).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
# Just a couple convergence secrets. The only thing we do with this value is
|
# Just a couple convergence secrets. The only thing we do with this value is
|
||||||
# feed it into a tagged hash. It certainly makes a difference to the output
|
# feed it into a tagged hash. It certainly makes a difference to the output
|
||||||
# but the hash should destroy any structure in the input so it doesn't seem
|
# but the hash should destroy any structure in the input so it doesn't seem
|
||||||
@ -35,7 +37,6 @@ CONVERGENCE_SECRETS = [
|
|||||||
digest(b"Hello world")[:16],
|
digest(b"Hello world")[:16],
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
# Exercise at least a handful of different sizes, trying to cover:
|
# Exercise at least a handful of different sizes, trying to cover:
|
||||||
#
|
#
|
||||||
# 1. Some cases smaller than one "segment" (128k).
|
# 1. Some cases smaller than one "segment" (128k).
|
||||||
@ -51,87 +52,66 @@ CONVERGENCE_SECRETS = [
|
|||||||
|
|
||||||
SEGMENT_SIZE = 128 * 1024
|
SEGMENT_SIZE = 128 * 1024
|
||||||
OBJECT_DESCRIPTIONS = [
|
OBJECT_DESCRIPTIONS = [
|
||||||
(b"a", 1024),
|
vectors.Sample(b"a", 1024),
|
||||||
(b"c", 4096),
|
vectors.Sample(b"c", 4096),
|
||||||
(digest(b"foo"), SEGMENT_SIZE - 1),
|
vectors.Sample(digest(b"foo"), SEGMENT_SIZE - 1),
|
||||||
(digest(b"bar"), SEGMENT_SIZE + 1),
|
vectors.Sample(digest(b"bar"), SEGMENT_SIZE + 1),
|
||||||
(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
|
vectors.Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
|
||||||
(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
|
vectors.Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
|
||||||
(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
|
vectors.Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
|
||||||
(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
|
vectors.Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
|
||||||
]
|
]
|
||||||
|
|
||||||
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
|
|
||||||
# Represent max symbolically and resolve it when we know what format we're
|
|
||||||
# dealing with.
|
|
||||||
MAX_SHARES = "max"
|
|
||||||
|
|
||||||
# SDMF and MDMF encode share counts (N and k) into the share itself as an
|
|
||||||
# unsigned byte. They could have encoded (share count - 1) to fit the full
|
|
||||||
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
|
|
||||||
# inaccessible to those formats and we set the upper bound at 255.
|
|
||||||
MAX_SHARES_MAP = {
|
|
||||||
"chk": 256,
|
|
||||||
"sdmf": 255,
|
|
||||||
"mdmf": 255,
|
|
||||||
}
|
|
||||||
|
|
||||||
ZFEC_PARAMS = [
|
ZFEC_PARAMS = [
|
||||||
(1, 1),
|
vectors.SeedParam(1, 1),
|
||||||
(1, 3),
|
vectors.SeedParam(1, 3),
|
||||||
(2, 3),
|
vectors.SeedParam(2, 3),
|
||||||
(3, 10),
|
vectors.SeedParam(3, 10),
|
||||||
(71, 255),
|
vectors.SeedParam(71, 255),
|
||||||
(101, MAX_SHARES),
|
vectors.SeedParam(101, vectors.MAX_SHARES),
|
||||||
]
|
]
|
||||||
|
|
||||||
FORMATS = [
|
FORMATS = [
|
||||||
"chk",
|
"chk",
|
||||||
"sdmf",
|
# "sdmf",
|
||||||
"mdmf",
|
# "mdmf",
|
||||||
]
|
]
|
||||||
|
|
||||||
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
|
||||||
def test_convergence(convergence_idx):
|
def test_convergence(convergence):
|
||||||
"""
|
"""
|
||||||
Convergence secrets are 16 bytes.
|
Convergence secrets are 16 bytes.
|
||||||
"""
|
"""
|
||||||
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
|
||||||
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
|
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
|
||||||
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
|
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
|
||||||
|
|
||||||
|
|
||||||
@mark.parametrize('params_idx', range(len(ZFEC_PARAMS)))
|
@mark.parametrize('seed_params', ZFEC_PARAMS)
|
||||||
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
@mark.parametrize('convergence', CONVERGENCE_SECRETS)
|
||||||
@mark.parametrize('data_idx', range(len(OBJECT_DESCRIPTIONS)))
|
@mark.parametrize('seed_data', OBJECT_DESCRIPTIONS)
|
||||||
@mark.parametrize('fmt_idx', range(len(FORMATS)))
|
@mark.parametrize('fmt', FORMATS)
|
||||||
@ensureDeferred
|
@ensureDeferred
|
||||||
async def test_capability(reactor, request, alice, params_idx, convergence_idx, data_idx, fmt_idx):
|
async def test_capability(reactor, request, alice, seed_params, convergence, seed_data, fmt):
|
||||||
"""
|
"""
|
||||||
The capability that results from uploading certain well-known data
|
The capability that results from uploading certain well-known data
|
||||||
with certain well-known parameters results in exactly the previously
|
with certain well-known parameters results in exactly the previously
|
||||||
computed value.
|
computed value.
|
||||||
"""
|
"""
|
||||||
case = load_case(
|
case = vectors.Case(seed_params, convergence, seed_data, fmt)
|
||||||
params_idx,
|
|
||||||
convergence_idx,
|
|
||||||
data_idx,
|
|
||||||
fmt_idx,
|
|
||||||
)
|
|
||||||
|
|
||||||
# rewrite alice's config to match params and convergence
|
# rewrite alice's config to match params and convergence
|
||||||
await reconfigure(reactor, request, alice, (1,) + case.params, case.convergence)
|
await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
|
||||||
|
|
||||||
# upload data in the correct format
|
# upload data in the correct format
|
||||||
actual = upload(alice, case.fmt, case.data)
|
actual = upload(alice, case.fmt, case.data)
|
||||||
|
|
||||||
# compare the resulting cap to the expected result
|
# compare the resulting cap to the expected result
|
||||||
expected = vectors.capabilities["vector"][case.key]
|
expected = vectors.capabilities[case]
|
||||||
assert actual == expected
|
assert actual == expected
|
||||||
|
|
||||||
|
|
||||||
@ensureDeferred
|
@ensureDeferred
|
||||||
async def skiptest_generate(reactor, request, alice):
|
async def test_generate(reactor, request, alice):
|
||||||
"""
|
"""
|
||||||
This is a helper for generating the test vectors.
|
This is a helper for generating the test vectors.
|
||||||
|
|
||||||
@ -141,27 +121,34 @@ async def skiptest_generate(reactor, request, alice):
|
|||||||
to run against the results produced originally, not a possibly
|
to run against the results produced originally, not a possibly
|
||||||
ever-changing set of outputs.
|
ever-changing set of outputs.
|
||||||
"""
|
"""
|
||||||
space = product(
|
space = starmap(vectors.Case, product(
|
||||||
range(len(ZFEC_PARAMS)),
|
ZFEC_PARAMS,
|
||||||
range(len(CONVERGENCE_SECRETS)),
|
CONVERGENCE_SECRETS,
|
||||||
range(len(OBJECT_DESCRIPTIONS)),
|
OBJECT_DESCRIPTIONS,
|
||||||
range(len(FORMATS)),
|
FORMATS,
|
||||||
)
|
))
|
||||||
results = await asyncfoldr(
|
results = generate(reactor, request, alice, space)
|
||||||
generate(reactor, request, alice, space),
|
|
||||||
insert,
|
|
||||||
{},
|
|
||||||
)
|
|
||||||
with vectors.DATA_PATH.open("w") as f:
|
with vectors.DATA_PATH.open("w") as f:
|
||||||
f.write(safe_dump({
|
f.write(safe_dump({
|
||||||
"version": "2022-12-26",
|
"version": "2023-01-03",
|
||||||
"params": {
|
"vector": [
|
||||||
"zfec": ZFEC_PARAMS,
|
{
|
||||||
"convergence": CONVERGENCE_SECRETS,
|
"convergence": vectors.encode_bytes(case.convergence),
|
||||||
"objects": OBJECT_DESCRIPTIONS,
|
"format": case.fmt,
|
||||||
"formats": FORMATS,
|
"sample": {
|
||||||
},
|
"seed": vectors.encode_bytes(case.seed_data.seed),
|
||||||
"vector": results,
|
"length": case.seed_data.length,
|
||||||
|
},
|
||||||
|
"zfec": {
|
||||||
|
"segmentSize": SEGMENT_SIZE,
|
||||||
|
"required": case.seed_params.required,
|
||||||
|
"total": case.seed_params.total,
|
||||||
|
},
|
||||||
|
"expected": cap,
|
||||||
|
}
|
||||||
|
async for (case, cap)
|
||||||
|
in results
|
||||||
|
],
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
|
||||||
@ -169,8 +156,8 @@ async def generate(
|
|||||||
reactor,
|
reactor,
|
||||||
request,
|
request,
|
||||||
alice: TahoeProcess,
|
alice: TahoeProcess,
|
||||||
space: Iterator[int, int, int, int],
|
cases: Iterator[vectors.Case],
|
||||||
) -> AsyncGenerator[tuple[str, str], None]:
|
) -> AsyncGenerator[[vectors.Case, str], None]:
|
||||||
"""
|
"""
|
||||||
Generate all of the test vectors using the given node.
|
Generate all of the test vectors using the given node.
|
||||||
|
|
||||||
@ -184,79 +171,21 @@ async def generate(
|
|||||||
|
|
||||||
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
|
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
|
||||||
|
|
||||||
:param space: An iterator of coordinates in the test vector space for
|
:param case: The inputs for which to generate a value.
|
||||||
which to generate values. The elements of each tuple give indexes into
|
|
||||||
ZFEC_PARAMS, CONVERGENCE_SECRETS, OBJECT_DESCRIPTIONS, and FORMATS.
|
|
||||||
|
|
||||||
:return: The yield values are two-tuples describing a test vector. The
|
:return: The capability for the case.
|
||||||
first element is a string describing a case and the second element is
|
|
||||||
the capability for that case.
|
|
||||||
"""
|
"""
|
||||||
# Share placement doesn't affect the resulting capability. For maximum
|
# Share placement doesn't affect the resulting capability. For maximum
|
||||||
# reliability of this generator, be happy if we can put shares anywhere
|
# reliability of this generator, be happy if we can put shares anywhere
|
||||||
happy = 1
|
happy = 1
|
||||||
node_key = (None, None)
|
for case in cases:
|
||||||
for params_idx, secret_idx, data_idx, fmt_idx in space:
|
await reconfigure(
|
||||||
case = load_case(params_idx, secret_idx, data_idx, fmt_idx)
|
reactor,
|
||||||
if node_key != (case.params, case.convergence):
|
request,
|
||||||
await reconfigure(reactor, request, alice, (happy,) + case.params, case.convergence)
|
alice,
|
||||||
node_key = (case.params, case.convergence)
|
(happy, case.params.required, case.params.total),
|
||||||
|
case.convergence
|
||||||
|
)
|
||||||
|
|
||||||
cap = upload(alice, case.fmt, case.data)
|
cap = upload(alice, case.fmt, case.data)
|
||||||
yield case.key, cap
|
yield case, cap
|
||||||
|
|
||||||
|
|
||||||
def key(params: int, secret: int, data: int, fmt: int) -> str:
|
|
||||||
"""
|
|
||||||
Construct the key describing the case defined by the given parameters.
|
|
||||||
|
|
||||||
The parameters are indexes into the test data for a certain case.
|
|
||||||
|
|
||||||
:return: A distinct string for the given inputs.
|
|
||||||
"""
|
|
||||||
return f"{params}-{secret}-{data}-{fmt}"
|
|
||||||
|
|
||||||
|
|
||||||
def stretch(seed: bytes, size: int) -> bytes:
|
|
||||||
"""
|
|
||||||
Given a simple description of a byte string, return the byte string
|
|
||||||
itself.
|
|
||||||
"""
|
|
||||||
assert isinstance(seed, bytes)
|
|
||||||
assert isinstance(size, int)
|
|
||||||
assert size > 0
|
|
||||||
assert len(seed) > 0
|
|
||||||
|
|
||||||
multiples = size // len(seed) + 1
|
|
||||||
return (seed * multiples)[:size]
|
|
||||||
|
|
||||||
|
|
||||||
def load_case(
|
|
||||||
params_idx: int,
|
|
||||||
convergence_idx: int,
|
|
||||||
data_idx: int,
|
|
||||||
fmt_idx: int
|
|
||||||
) -> Case:
|
|
||||||
"""
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
params = ZFEC_PARAMS[params_idx]
|
|
||||||
fmt = FORMATS[fmt_idx]
|
|
||||||
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
|
||||||
data = stretch(*OBJECT_DESCRIPTIONS[data_idx])
|
|
||||||
if params[1] == MAX_SHARES:
|
|
||||||
params = (params[0], MAX_SHARES_MAP[fmt])
|
|
||||||
k = key(params_idx, convergence_idx, data_idx, fmt_idx)
|
|
||||||
return Case(k, fmt, params, convergence, data)
|
|
||||||
|
|
||||||
|
|
||||||
@frozen
|
|
||||||
class Case:
|
|
||||||
"""
|
|
||||||
Represent one case for which we want/have a test vector.
|
|
||||||
"""
|
|
||||||
key: str
|
|
||||||
fmt: str
|
|
||||||
params: tuple[int, int]
|
|
||||||
convergence: bytes
|
|
||||||
data: bytes
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,18 +1,144 @@
|
|||||||
"""
|
"""
|
||||||
A module that loads pre-generated test vectors.
|
A module that loads pre-generated test vectors.
|
||||||
|
|
||||||
:ivar CHK_PATH: The path of the file containing CHK test vectors.
|
:ivar DATA_PATH: The path of the file containing test vectors.
|
||||||
|
|
||||||
:ivar chk: The CHK test vectors.
|
:ivar capabilities: The CHK test vectors.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TextIO
|
||||||
|
from attrs import frozen
|
||||||
from yaml import safe_load
|
from yaml import safe_load
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from base64 import b64encode, b64decode
|
||||||
|
|
||||||
DATA_PATH: Path = Path(__file__).parent / "test_vectors.yaml"
|
DATA_PATH: Path = Path(__file__).parent / "test_vectors.yaml"
|
||||||
|
|
||||||
|
@frozen
|
||||||
|
class Sample:
|
||||||
|
"""
|
||||||
|
Some instructions for building a long byte string.
|
||||||
|
|
||||||
|
:ivar seed: Some bytes to repeat some times to produce the string.
|
||||||
|
:ivar length: The length of the desired byte string.
|
||||||
|
"""
|
||||||
|
seed: bytes
|
||||||
|
length: int
|
||||||
|
|
||||||
|
@frozen
|
||||||
|
class Param:
|
||||||
|
"""
|
||||||
|
Some ZFEC parameters.
|
||||||
|
"""
|
||||||
|
required: int
|
||||||
|
total: int
|
||||||
|
|
||||||
|
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
|
||||||
|
# Represent max symbolically and resolve it when we know what format we're
|
||||||
|
# dealing with.
|
||||||
|
MAX_SHARES = "max"
|
||||||
|
|
||||||
|
# SDMF and MDMF encode share counts (N and k) into the share itself as an
|
||||||
|
# unsigned byte. They could have encoded (share count - 1) to fit the full
|
||||||
|
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
|
||||||
|
# inaccessible to those formats and we set the upper bound at 255.
|
||||||
|
MAX_SHARES_MAP = {
|
||||||
|
"chk": 256,
|
||||||
|
"sdmf": 255,
|
||||||
|
"mdmf": 255,
|
||||||
|
}
|
||||||
|
|
||||||
|
@frozen
|
||||||
|
class SeedParam:
|
||||||
|
"""
|
||||||
|
Some ZFEC parameters, almost.
|
||||||
|
|
||||||
|
:ivar required: The number of required shares.
|
||||||
|
|
||||||
|
:ivar total: Either the number of total shares or the constant
|
||||||
|
``MAX_SHARES`` to indicate that the total number of shares should be
|
||||||
|
the maximum number supported by the object format.
|
||||||
|
"""
|
||||||
|
required: int
|
||||||
|
total: int | str
|
||||||
|
|
||||||
|
def realize(self, max_total: int) -> Param:
|
||||||
|
"""
|
||||||
|
Create a ``Param`` from this object's values, possibly
|
||||||
|
substituting the given real value for total if necessary.
|
||||||
|
|
||||||
|
:param max_total: The value to use to replace ``MAX_SHARES`` if
|
||||||
|
necessary.
|
||||||
|
"""
|
||||||
|
if self.total == MAX_SHARES:
|
||||||
|
return Param(self.required, max_total)
|
||||||
|
return Param(self.required, self.total)
|
||||||
|
|
||||||
|
@frozen
|
||||||
|
class Case:
|
||||||
|
"""
|
||||||
|
Represent one case for which we want/have a test vector.
|
||||||
|
"""
|
||||||
|
seed_params: Param
|
||||||
|
convergence: bytes
|
||||||
|
seed_data: Sample
|
||||||
|
fmt: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data(self):
|
||||||
|
return stretch(self.seed_data.seed, self.seed_data.length)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def params(self):
|
||||||
|
return self.seed_params.realize(MAX_SHARES_MAP[self.fmt])
|
||||||
|
|
||||||
|
|
||||||
|
def encode_bytes(b: bytes) -> str:
|
||||||
|
"""
|
||||||
|
Base64 encode some bytes to text so they are representable in JSON.
|
||||||
|
"""
|
||||||
|
return b64encode(b).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def decode_bytes(b: str) -> bytes:
|
||||||
|
"""
|
||||||
|
Base64 decode some text to bytes.
|
||||||
|
"""
|
||||||
|
return b64decode(b.encode("ascii"))
|
||||||
|
|
||||||
|
|
||||||
|
def stretch(seed: bytes, size: int) -> bytes:
|
||||||
|
"""
|
||||||
|
Given a simple description of a byte string, return the byte string
|
||||||
|
itself.
|
||||||
|
"""
|
||||||
|
assert isinstance(seed, bytes)
|
||||||
|
assert isinstance(size, int)
|
||||||
|
assert size > 0
|
||||||
|
assert len(seed) > 0
|
||||||
|
|
||||||
|
multiples = size // len(seed) + 1
|
||||||
|
return (seed * multiples)[:size]
|
||||||
|
|
||||||
|
|
||||||
|
def load_capabilities(f: TextIO) -> dict[Case, str]:
|
||||||
|
data = safe_load(f)
|
||||||
|
return {
|
||||||
|
Case(
|
||||||
|
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
|
||||||
|
convergence=decode_bytes(case["convergence"]),
|
||||||
|
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
|
||||||
|
fmt=case["format"],
|
||||||
|
): case["expected"]
|
||||||
|
for case
|
||||||
|
in data["vector"]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with DATA_PATH.open() as f:
|
with DATA_PATH.open() as f:
|
||||||
capabilities: dict[str, str] = safe_load(f)
|
capabilities: dict[Case, str] = load_capabilities(f)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
capabilities = {}
|
capabilities = {}
|
||||||
|
Loading…
Reference in New Issue
Block a user