2022-12-21 22:14:08 +00:00
|
|
|
"""
|
|
|
|
Verify certain results against test vectors with well-known results.
|
|
|
|
"""
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
from typing import AsyncGenerator, Iterator
|
2022-12-21 22:14:08 +00:00
|
|
|
from hashlib import sha256
|
|
|
|
from itertools import product
|
2022-12-22 15:51:59 +00:00
|
|
|
from yaml import safe_dump
|
|
|
|
|
|
|
|
from pytest import mark
|
|
|
|
from pytest_twisted import ensureDeferred
|
2022-12-21 22:14:08 +00:00
|
|
|
|
2022-12-22 15:51:59 +00:00
|
|
|
from . import vectors
|
2022-12-22 22:02:42 +00:00
|
|
|
from .util import reconfigure, upload, asyncfoldr, insert, TahoeProcess
|
|
|
|
|
|
|
|
def digest(bs: bytes) -> bytes:
|
|
|
|
return sha256(bs).digest()
|
|
|
|
|
2022-12-21 22:14:08 +00:00
|
|
|
|
2022-12-22 22:02:42 +00:00
|
|
|
def hexdigest(bs: bytes) -> str:
|
2022-12-22 21:52:00 +00:00
|
|
|
return sha256(bs).hexdigest()
|
|
|
|
|
2022-12-22 22:02:42 +00:00
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
# Just a couple convergence secrets. The only thing we do with this value is
|
|
|
|
# feed it into a tagged hash. It certainly makes a difference to the output
|
|
|
|
# but the hash should destroy any structure in the input so it doesn't seem
|
|
|
|
# like there's a reason to test a lot of different values.
|
2022-12-21 22:14:08 +00:00
|
|
|
CONVERGENCE_SECRETS = [
|
|
|
|
b"aaaaaaaaaaaaaaaa",
|
2022-12-22 21:52:00 +00:00
|
|
|
digest(b"Hello world")[:16],
|
2022-12-21 22:14:08 +00:00
|
|
|
]
|
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
|
|
|
|
# Exercise at least a handful of different sizes, trying to cover:
|
|
|
|
#
|
|
|
|
# 1. Some cases smaller than one "segment" (128k).
|
|
|
|
# This covers shrinking of some parameters to match data size.
|
|
|
|
#
|
|
|
|
# 2. Some cases right on the edges of integer segment multiples.
|
|
|
|
# Because boundaries are tricky.
|
|
|
|
#
|
|
|
|
# 4. Some cases that involve quite a few segments.
|
|
|
|
# This exercises merkle tree construction more thoroughly.
|
|
|
|
#
|
|
|
|
# See ``stretch`` for construction of the actual test data.
|
|
|
|
|
|
|
|
SEGMENT_SIZE = 128 * 1024
|
|
|
|
OBJECT_DESCRIPTIONS = [
|
|
|
|
(b"a", 1024),
|
|
|
|
(b"c", 4096),
|
|
|
|
(digest(b"foo"), SEGMENT_SIZE - 1),
|
|
|
|
(digest(b"bar"), SEGMENT_SIZE + 1),
|
|
|
|
(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
|
|
|
|
(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
|
|
|
|
(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
|
|
|
|
(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
|
2022-12-21 22:14:08 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
ZFEC_PARAMS = [
|
|
|
|
(1, 1),
|
|
|
|
(1, 3),
|
2022-12-22 16:35:37 +00:00
|
|
|
(2, 3),
|
|
|
|
(3, 10),
|
|
|
|
(71, 255),
|
|
|
|
(101, 256),
|
2022-12-21 22:14:08 +00:00
|
|
|
]
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
|
|
|
def test_convergence(convergence_idx):
|
|
|
|
"""
|
|
|
|
Convergence secrets are 16 bytes.
|
|
|
|
"""
|
|
|
|
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
2022-12-21 22:14:08 +00:00
|
|
|
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
|
|
|
|
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
|
|
|
|
|
|
|
|
|
2022-12-22 21:52:00 +00:00
|
|
|
@mark.parametrize('params_idx', range(len(ZFEC_PARAMS)))
|
|
|
|
@mark.parametrize('convergence_idx', range(len(CONVERGENCE_SECRETS)))
|
2022-12-26 17:06:34 +00:00
|
|
|
@mark.parametrize('data_idx', range(len(OBJECT_DESCRIPTIONS)))
|
2022-12-22 15:51:59 +00:00
|
|
|
@ensureDeferred
|
2022-12-22 21:52:00 +00:00
|
|
|
async def test_chk_capability(reactor, request, alice, params_idx, convergence_idx, data_idx):
|
|
|
|
"""
|
|
|
|
The CHK capability that results from uploading certain well-known data
|
|
|
|
with certain well-known parameters results in exactly the previously
|
|
|
|
computed value.
|
|
|
|
"""
|
2022-12-26 17:06:34 +00:00
|
|
|
key, params, convergence, data = load_case(
|
|
|
|
params_idx,
|
|
|
|
convergence_idx,
|
|
|
|
data_idx,
|
|
|
|
)
|
2022-12-22 21:52:00 +00:00
|
|
|
|
2022-12-21 22:14:08 +00:00
|
|
|
# rewrite alice's config to match params and convergence
|
2022-12-23 01:53:49 +00:00
|
|
|
await reconfigure(reactor, request, alice, (1,) + params, convergence)
|
2022-12-21 22:14:08 +00:00
|
|
|
|
|
|
|
# upload data as a CHK
|
2022-12-22 21:52:00 +00:00
|
|
|
actual = upload(alice, "chk", data)
|
2022-12-21 22:14:08 +00:00
|
|
|
|
|
|
|
# compare the resulting cap to the expected result
|
2022-12-26 17:06:34 +00:00
|
|
|
expected = vectors.chk[key]
|
2022-12-21 22:14:08 +00:00
|
|
|
assert actual == expected
|
|
|
|
|
2022-12-22 15:51:59 +00:00
|
|
|
|
|
|
|
@ensureDeferred
|
2022-12-26 17:06:34 +00:00
|
|
|
async def test_generate(reactor, request, alice):
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
|
|
|
This is a helper for generating the test vectors.
|
|
|
|
|
|
|
|
You can re-generate the test vectors by fixing the name of the test and
|
|
|
|
running it. Normally this test doesn't run because it ran once and we
|
|
|
|
captured its output. Other tests run against that output and we want them
|
|
|
|
to run against the results produced originally, not a possibly
|
|
|
|
ever-changing set of outputs.
|
|
|
|
"""
|
2022-12-26 17:06:34 +00:00
|
|
|
space = product(
|
|
|
|
range(len(ZFEC_PARAMS)),
|
|
|
|
range(len(CONVERGENCE_SECRETS)),
|
|
|
|
range(len(OBJECT_DESCRIPTIONS)),
|
|
|
|
)
|
2022-12-22 15:51:59 +00:00
|
|
|
results = await asyncfoldr(
|
2022-12-26 17:06:34 +00:00
|
|
|
generate(reactor, request, alice, space),
|
2022-12-22 15:51:59 +00:00
|
|
|
insert,
|
|
|
|
{},
|
|
|
|
)
|
|
|
|
with vectors.CHK_PATH.open("w") as f:
|
2022-12-26 17:06:34 +00:00
|
|
|
f.write(safe_dump({
|
|
|
|
"version": "2022-12-26",
|
|
|
|
"params": {
|
|
|
|
"zfec": ZFEC_PARAMS,
|
|
|
|
"convergence": CONVERGENCE_SECRETS,
|
|
|
|
"objects": OBJECT_DESCRIPTIONS,
|
|
|
|
},
|
|
|
|
"vector": results,
|
|
|
|
}))
|
|
|
|
|
|
|
|
|
|
|
|
async def generate(
|
|
|
|
reactor,
|
|
|
|
request,
|
|
|
|
alice: TahoeProcess,
|
|
|
|
space: Iterator[int, int, int],
|
|
|
|
) -> AsyncGenerator[tuple[str, str], None]:
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
|
|
|
Generate all of the test vectors using the given node.
|
|
|
|
|
|
|
|
:param reactor: The reactor to use to restart the Tahoe-LAFS node when it
|
|
|
|
needs to be reconfigured.
|
|
|
|
|
|
|
|
:param request: The pytest request object to use to arrange process
|
|
|
|
cleanup.
|
|
|
|
|
|
|
|
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
|
|
|
|
|
|
|
|
:return: The yield values are two-tuples describing a test vector. The
|
|
|
|
first element is a string describing a case and the second element is
|
|
|
|
the CHK capability for that case.
|
|
|
|
"""
|
2022-12-26 17:06:34 +00:00
|
|
|
# Share placement doesn't affect the resulting capability. For maximum
|
|
|
|
# reliability, be happy if we can put shares anywhere
|
|
|
|
happy = 1
|
2022-12-22 15:51:59 +00:00
|
|
|
node_key = (None, None)
|
2022-12-26 17:06:34 +00:00
|
|
|
for params_idx, secret_idx, data_idx in space:
|
|
|
|
key, params, secret, data = load_case(params_idx, secret_idx, data_idx)
|
2022-12-22 15:51:59 +00:00
|
|
|
if node_key != (params, secret):
|
2022-12-26 17:06:34 +00:00
|
|
|
await reconfigure(reactor, request, alice, (happy,) + params, secret)
|
2022-12-22 15:51:59 +00:00
|
|
|
node_key = (params, secret)
|
2022-12-21 22:14:08 +00:00
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
yield key, upload(alice, "chk", data)
|
2022-12-22 15:51:59 +00:00
|
|
|
|
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
def key(params: int, secret: int, data: int) -> str:
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
|
|
|
Construct the key describing the case defined by the given parameters.
|
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
The parameters are indexes into the test data for a certain case.
|
2022-12-22 21:52:00 +00:00
|
|
|
|
2022-12-26 17:06:34 +00:00
|
|
|
:return: A distinct string for the given inputs.
|
2022-12-22 21:52:00 +00:00
|
|
|
"""
|
2022-12-26 17:06:34 +00:00
|
|
|
return f"{params}-{secret}-{data}"
|
|
|
|
|
|
|
|
|
|
|
|
def stretch(seed: bytes, size: int) -> bytes:
|
|
|
|
"""
|
|
|
|
Given a simple description of a byte string, return the byte string
|
|
|
|
itself.
|
|
|
|
"""
|
|
|
|
assert isinstance(seed, bytes)
|
|
|
|
assert isinstance(size, int)
|
|
|
|
assert size > 0
|
|
|
|
assert len(seed) > 0
|
|
|
|
|
|
|
|
multiples = size // len(seed) + 1
|
|
|
|
return (seed * multiples)[:size]
|
|
|
|
|
|
|
|
|
|
|
|
def load_case(params_idx: int, convergence_idx: int, data_idx: int) -> tuple[str, tuple[int, int], bytes, bytes]:
|
|
|
|
params = ZFEC_PARAMS[params_idx]
|
|
|
|
convergence = CONVERGENCE_SECRETS[convergence_idx]
|
|
|
|
data = stretch(*OBJECT_DESCRIPTIONS[data_idx])
|
|
|
|
key = (params_idx, convergence_idx, data_idx)
|
|
|
|
return (key, params, convergence, data)
|