2022-12-21 22:14:08 +00:00
|
|
|
"""
|
|
|
|
Verify certain results against test vectors with well-known results.
|
|
|
|
"""
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
from time import sleep
|
2022-12-26 17:06:34 +00:00
|
|
|
from typing import AsyncGenerator, Iterator
|
2022-12-21 22:14:08 +00:00
|
|
|
from hashlib import sha256
|
|
|
|
from itertools import product
|
2022-12-22 15:51:59 +00:00
|
|
|
from yaml import safe_dump
|
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
from attrs import frozen
|
|
|
|
|
2022-12-22 15:51:59 +00:00
|
|
|
from pytest import mark
|
|
|
|
from pytest_twisted import ensureDeferred
|
2022-12-21 22:14:08 +00:00
|
|
|
|
2022-12-22 15:51:59 +00:00
|
|
|
from . import vectors
|
2022-12-22 22:02:42 +00:00
|
|
|
from .util import reconfigure, upload, asyncfoldr, insert, TahoeProcess
|
|
|
|
|
|
|
|
def digest(bs: bytes) -> bytes:
|
|
|
|
return sha256(bs).digest()
|
|
|
|
|
2022-12-21 22:14:08 +00:00
|
|
|
|
2022-12-22 22:02:42 +00:00
|
|
|
def hexdigest(bs: bytes) -> str:
|
2022-12-22 21:52:00 +00:00
|
|
|
return sha256(bs).hexdigest()
|
|
|
|
|
2022-12-22 22:02:42 +00:00
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
# Sometimes upload fail spuriously...
|
|
|
|
RETRIES = 3
|
|
|
|
|
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
# Just a couple convergence secrets. The only thing we do with this value is
|
|
|
|
# feed it into a tagged hash. It certainly makes a difference to the output
|
|
|
|
# but the hash should destroy any structure in the input so it doesn't seem
|
|
|
|
# like there's a reason to test a lot of different values.
|
2022-12-21 22:14:08 +00:00
|
|
|
CONVERGENCE_SECRETS = [
|
|
|
|
b"aaaaaaaaaaaaaaaa",
|
2022-12-22 21:52:00 +00:00
|
|
|
digest(b"Hello world")[:16],
|
2022-12-21 22:14:08 +00:00
|
|
|
]
|
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
|
|
|
|
# Exercise at least a handful of different sizes, trying to cover:
|
|
|
|
#
|
|
|
|
# 1. Some cases smaller than one "segment" (128k).
|
|
|
|
# This covers shrinking of some parameters to match data size.
|
|
|
|
#
|
|
|
|
# 2. Some cases right on the edges of integer segment multiples.
|
|
|
|
# Because boundaries are tricky.
|
|
|
|
#
|
|
|
|
# 4. Some cases that involve quite a few segments.
|
|
|
|
# This exercises merkle tree construction more thoroughly.
|
|
|
|
#
|
|
|
|
# See ``stretch`` for construction of the actual test data.
|
|
|
|
|
|
|
|
SEGMENT_SIZE = 128 * 1024
|
|
|
|
OBJECT_DESCRIPTIONS = [
|
|
|
|
(b"a", 1024),
|
|
|
|
(b"c", 4096),
|
|
|
|
(digest(b"foo"), SEGMENT_SIZE - 1),
|
|
|
|
(digest(b"bar"), SEGMENT_SIZE + 1),
|
|
|
|
(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
|
|
|
|
(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
|
|
|
|
(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
|
|
|
|
(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
|
2022-12-21 22:14:08 +00:00
|
|
|
]
|
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
|
|
|
|
# Represent max symbolically and resolve it when we know what format we're
|
|
|
|
# dealing with.
|
|
|
|
MAX_SHARES = "max"
|
|
|
|
|
|
|
|
# SDMF and MDMF encode share counts (N and k) into the share itself as an
|
|
|
|
# unsigned byte. They could have encoded (share count - 1) to fit the full
|
|
|
|
# range supported by ZFEC into the unsigned byte - but they don't. So 256 is
|
|
|
|
# inaccessible to those formats and we set the upper bound at 255.
|
|
|
|
MAX_SHARES_MAP = {
|
|
|
|
"chk": 256,
|
|
|
|
"sdmf": 255,
|
|
|
|
"mdmf": 255,
|
|
|
|
}
|
|
|
|
|
2022-12-21 22:14:08 +00:00
|
|
|
ZFEC_PARAMS = [
|
|
|
|
(1, 1),
|
|
|
|
(1, 3),
|
2022-12-22 16:35:37 +00:00
|
|
|
(2, 3),
|
|
|
|
(3, 10),
|
|
|
|
(71, 255),
|
2022-12-26 22:08:30 +00:00
|
|
|
(101, MAX_SHARES),
|
|
|
|
]
|
|
|
|
|
|
|
|
FORMATS = [
|
|
|
|
"chk",
|
|
|
|
"sdmf",
|
|
|
|
"mdmf",
|
2022-12-21 22:14:08 +00:00
|
|
|
]
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
|
|
|
def test_convergence(convergence_idx):
|
|
|
|
"""
|
|
|
|
Convergence secrets are 16 bytes.
|
|
|
|
"""
|
|
|
|
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
2022-12-21 22:14:08 +00:00
|
|
|
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
|
|
|
|
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
|
|
|
|
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
@mark.parametrize('params_idx', range(len(ZFEC_PARAMS)))
|
|
|
|
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
2022-12-26 17:06:34 +00:00
|
|
|
@mark.parametrize('data_idx', range(len(OBJECT_DESCRIPTIONS)))
|
2022-12-26 22:08:30 +00:00
|
|
|
@mark.parametrize('fmt_idx', range(len(FORMATS)))
|
2022-12-22 15:51:59 +00:00
|
|
|
@ensureDeferred
|
2022-12-26 22:08:30 +00:00
|
|
|
async def test_capability(reactor, request, alice, params_idx, convergence_idx, data_idx, fmt_idx):
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
2022-12-26 22:08:30 +00:00
|
|
|
The capability that results from uploading certain well-known data
|
2022-12-22 21:52:00 +00:00
|
|
|
with certain well-known parameters results in exactly the previously
|
|
|
|
computed value.
|
|
|
|
"""
|
2022-12-26 22:08:30 +00:00
|
|
|
case = load_case(
|
2022-12-26 17:06:34 +00:00
|
|
|
params_idx,
|
|
|
|
convergence_idx,
|
|
|
|
data_idx,
|
2022-12-26 22:08:30 +00:00
|
|
|
fmt_idx,
|
2022-12-26 17:06:34 +00:00
|
|
|
)
|
2022-12-22 21:52:00 +00:00
|
|
|
|
2022-12-21 22:14:08 +00:00
|
|
|
# rewrite alice's config to match params and convergence
|
2022-12-26 22:08:30 +00:00
|
|
|
await reconfigure(reactor, request, alice, (1,) + case.params, case.convergence)
|
2022-12-21 22:14:08 +00:00
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
# upload data in the correct format
|
|
|
|
actual = upload(alice, case.fmt, case.data)
|
2022-12-21 22:14:08 +00:00
|
|
|
|
|
|
|
# compare the resulting cap to the expected result
|
2022-12-26 22:08:30 +00:00
|
|
|
expected = vectors.capabilities[case.key]
|
2022-12-21 22:14:08 +00:00
|
|
|
assert actual == expected
|
|
|
|
|
2022-12-22 15:51:59 +00:00
|
|
|
|
|
|
|
@ensureDeferred
|
2022-12-26 17:06:34 +00:00
|
|
|
async def test_generate(reactor, request, alice):
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
|
|
|
This is a helper for generating the test vectors.
|
|
|
|
|
|
|
|
You can re-generate the test vectors by fixing the name of the test and
|
|
|
|
running it. Normally this test doesn't run because it ran once and we
|
|
|
|
captured its output. Other tests run against that output and we want them
|
|
|
|
to run against the results produced originally, not a possibly
|
|
|
|
ever-changing set of outputs.
|
|
|
|
"""
|
2022-12-26 17:06:34 +00:00
|
|
|
space = product(
|
|
|
|
range(len(ZFEC_PARAMS)),
|
|
|
|
range(len(CONVERGENCE_SECRETS)),
|
|
|
|
range(len(OBJECT_DESCRIPTIONS)),
|
2022-12-26 22:08:30 +00:00
|
|
|
range(len(FORMATS)),
|
2022-12-26 17:06:34 +00:00
|
|
|
)
|
2022-12-22 15:51:59 +00:00
|
|
|
results = await asyncfoldr(
|
2022-12-26 17:06:34 +00:00
|
|
|
generate(reactor, request, alice, space),
|
2022-12-22 15:51:59 +00:00
|
|
|
insert,
|
|
|
|
{},
|
|
|
|
)
|
|
|
|
with vectors.CHK_PATH.open("w") as f:
|
2022-12-26 17:06:34 +00:00
|
|
|
f.write(safe_dump({
|
|
|
|
"version": "2022-12-26",
|
|
|
|
"params": {
|
|
|
|
"zfec": ZFEC_PARAMS,
|
|
|
|
"convergence": CONVERGENCE_SECRETS,
|
|
|
|
"objects": OBJECT_DESCRIPTIONS,
|
2022-12-26 22:08:30 +00:00
|
|
|
"formats": FORMATS,
|
2022-12-26 17:06:34 +00:00
|
|
|
},
|
|
|
|
"vector": results,
|
|
|
|
}))
|
|
|
|
|
|
|
|
|
|
|
|
async def generate(
|
|
|
|
reactor,
|
|
|
|
request,
|
|
|
|
alice: TahoeProcess,
|
2022-12-26 22:08:30 +00:00
|
|
|
space: Iterator[int, int, int, int],
|
2022-12-26 17:06:34 +00:00
|
|
|
) -> AsyncGenerator[tuple[str, str], None]:
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
|
|
|
Generate all of the test vectors using the given node.
|
|
|
|
|
|
|
|
:param reactor: The reactor to use to restart the Tahoe-LAFS node when it
|
|
|
|
needs to be reconfigured.
|
|
|
|
|
|
|
|
:param request: The pytest request object to use to arrange process
|
|
|
|
cleanup.
|
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
:param format: The name of the encryption/data format to use.
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
|
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
:param space: An iterator of coordinates in the test vector space for
|
|
|
|
which to generate values. The elements of each tuple give indexes into
|
|
|
|
ZFEC_PARAMS, CONVERGENCE_SECRETS, OBJECT_DESCRIPTIONS, and FORMTS.
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
:return: The yield values are two-tuples describing a test vector. The
|
|
|
|
first element is a string describing a case and the second element is
|
|
|
|
the CHK capability for that case.
|
|
|
|
"""
|
2022-12-26 17:06:34 +00:00
|
|
|
# Share placement doesn't affect the resulting capability. For maximum
|
|
|
|
# reliability, be happy if we can put shares anywhere
|
|
|
|
happy = 1
|
2022-12-22 15:51:59 +00:00
|
|
|
node_key = (None, None)
|
2022-12-26 22:08:30 +00:00
|
|
|
for params_idx, secret_idx, data_idx, fmt_idx in space:
|
|
|
|
case = load_case(params_idx, secret_idx, data_idx, fmt_idx)
|
|
|
|
if node_key != (case.params, case.convergence):
|
|
|
|
await reconfigure(reactor, request, alice, (happy,) + case.params, case.convergence)
|
|
|
|
node_key = (case.params, case.convergence)
|
2022-12-21 22:14:08 +00:00
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
cap = upload(alice, case.fmt, case.data)
|
|
|
|
yield case.key, cap
|
2022-12-22 15:51:59 +00:00
|
|
|
|
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
def key(params: int, secret: int, data: int, fmt: int) -> str:
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
|
|
|
Construct the key describing the case defined by the given parameters.
|
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
The parameters are indexes into the test data for a certain case.
|
2022-12-22 21:52:00 +00:00
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
:return: A distinct string for the given inputs.
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
2022-12-26 22:08:30 +00:00
|
|
|
return f"{params}-{secret}-{data}-{fmt}"
|
2022-12-26 17:06:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
def stretch(seed: bytes, size: int) -> bytes:
|
|
|
|
"""
|
|
|
|
Given a simple description of a byte string, return the byte string
|
|
|
|
itself.
|
|
|
|
"""
|
|
|
|
assert isinstance(seed, bytes)
|
|
|
|
assert isinstance(size, int)
|
|
|
|
assert size > 0
|
|
|
|
assert len(seed) > 0
|
|
|
|
|
|
|
|
multiples = size // len(seed) + 1
|
|
|
|
return (seed * multiples)[:size]
|
|
|
|
|
|
|
|
|
2022-12-26 22:08:30 +00:00
|
|
|
def load_case(
|
|
|
|
params_idx: int,
|
|
|
|
convergence_idx: int,
|
|
|
|
data_idx: int,
|
|
|
|
fmt_idx: int
|
|
|
|
) -> Case:
|
|
|
|
"""
|
|
|
|
:return:
|
|
|
|
"""
|
2022-12-26 17:06:34 +00:00
|
|
|
params = ZFEC_PARAMS[params_idx]
|
2022-12-26 22:08:30 +00:00
|
|
|
fmt = FORMATS[fmt_idx]
|
2022-12-26 17:06:34 +00:00
|
|
|
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
|
|
|
data = stretch(*OBJECT_DESCRIPTIONS[data_idx])
|
2022-12-26 22:08:30 +00:00
|
|
|
if params[1] == MAX_SHARES:
|
|
|
|
params = (params[0], MAX_SHARES_MAP[fmt])
|
|
|
|
k = key(params_idx, convergence_idx, data_idx, fmt_idx)
|
|
|
|
return Case(k, fmt, params, convergence, data)
|
|
|
|
|
|
|
|
|
|
|
|
@frozen
|
|
|
|
class Case:
|
|
|
|
"""
|
|
|
|
Represent one case for which we want/have a test vector.
|
|
|
|
"""
|
|
|
|
key: str
|
|
|
|
fmt: str
|
|
|
|
params: tuple[int, int]
|
|
|
|
convergence: bytes
|
|
|
|
data: bytes
|