tahoe-lafs/integration/test_vectors.py

"""
Verify certain results against test vectors with well-known results.
"""

from __future__ import annotations

from typing import AsyncGenerator, Iterator
from hashlib import sha256
from itertools import starmap, product
from yaml import safe_dump

from attrs import evolve

from pytest import mark
from pytest_twisted import ensureDeferred

from .vectors import vectors
from .util import CHK, SSK, reconfigure, upload, TahoeProcess

def digest(bs: bytes) -> bytes:
    """
    Digest bytes to bytes.
    """
    return sha256(bs).digest()


def hexdigest(bs: bytes) -> str:
    """
    Digest bytes to text.
    """
    return sha256(bs).hexdigest()

# Just a couple convergence secrets.  The only thing we do with this value is
# feed it into a tagged hash.  It certainly makes a difference to the output
# but the hash should destroy any structure in the input so it doesn't seem
# like there's a reason to test a lot of different values.
CONVERGENCE_SECRETS = [
    b"aaaaaaaaaaaaaaaa",
    digest(b"Hello world")[:16],
]

# Exercise at least a handful of different sizes, trying to cover:
#
#  1. Some cases smaller than one "segment" (128k).
#     This covers shrinking of some parameters to match data size.
#     This includes one case of the smallest possible CHK.
#
#  2. Some cases right on the edges of integer segment multiples.
#     Because boundaries are tricky.
#
#  4. Some cases that involve quite a few segments.
#     This exercises merkle tree construction more thoroughly.
#
# See ``stretch`` for construction of the actual test data.

SEGMENT_SIZE = 128 * 1024
OBJECT_DESCRIPTIONS = [
    # The smallest possible.  55 bytes and smaller are LIT.
    vectors.Sample(b"a", 56),
    vectors.Sample(b"a", 1024),
    vectors.Sample(b"c", 4096),
    vectors.Sample(digest(b"foo"), SEGMENT_SIZE - 1),
    vectors.Sample(digest(b"bar"), SEGMENT_SIZE + 1),
    vectors.Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
    vectors.Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
    vectors.Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
    vectors.Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
]

ZFEC_PARAMS = [
    vectors.SeedParam(1, 1),
    vectors.SeedParam(1, 3),
    vectors.SeedParam(2, 3),
    vectors.SeedParam(3, 10),
    vectors.SeedParam(71, 255),
    vectors.SeedParam(101, vectors.MAX_SHARES),
]

FORMATS = [
    CHK(),
    # These start out unaware of a key but various keys will be supplied
    # during generation.
    SSK(name="sdmf", key=None),
    SSK(name="mdmf", key=None),
]

@mark.parametrize('convergence', CONVERGENCE_SECRETS)
def test_convergence(convergence):
    """
    Convergence secrets are 16 bytes.
    """
    assert isinstance(convergence, bytes), "Convergence secret must be bytes"
    assert len(convergence) == 16, "Convergence secret must by 16 bytes"


@mark.parametrize('case_and_expected', vectors.capabilities.items())
@ensureDeferred
async def test_capability(reactor, request, alice, case_and_expected):
    """
    The capability that results from uploading certain well-known data
    with certain well-known parameters results in exactly the previously
    computed value.
    """
    case, expected = case_and_expected

    # rewrite alice's config to match params and convergence
    await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)

    # upload data in the correct format
    actual = upload(alice, case.fmt, case.data)

    # compare the resulting cap to the expected result
    assert actual == expected


@ensureDeferred
async def skiptest_generate(reactor, request, alice):
    """
    This is a helper for generating the test vectors.

    You can re-generate the test vectors by fixing the name of the test and
    running it.  Normally this test doesn't run because it ran once and we
    captured its output.  Other tests run against that output and we want them
    to run against the results produced originally, not a possibly
    ever-changing set of outputs.
    """
    space = starmap(vectors.Case, product(
        ZFEC_PARAMS,
        CONVERGENCE_SECRETS,
        OBJECT_DESCRIPTIONS,
        FORMATS,
    ))
    iterresults = generate(reactor, request, alice, space)

    # Update the output file with results as they become available.
    results = []
    async for result in iterresults:
        results.append(result)
        write_results(vectors.DATA_PATH, results)

def write_results(path: FilePath, results: list[tuple[Case, str]]) -> None:
    """
    Save the given results.
    """
    path.setContent(safe_dump({
        "version": vectors.CURRENT_VERSION,
        "vector": [
            {
                "convergence": vectors.encode_bytes(case.convergence),
                "format": {
                    "kind": case.fmt.kind,
                    "params": case.fmt.to_json(),
                },
                "sample": {
                    "seed": vectors.encode_bytes(case.seed_data.seed),
                    "length": case.seed_data.length,
                },
                "zfec": {
                    "segmentSize": SEGMENT_SIZE,
                    "required": case.params.required,
                    "total": case.params.total,
                },
                "expected": cap,
            }
            for (case, cap)
            in results
        ],
    }).encode("ascii"))

async def generate(
        reactor,
        request,
        alice: TahoeProcess,
        cases: Iterator[vectors.Case],
) -> AsyncGenerator[[vectors.Case, str], None]:
    """
    Generate all of the test vectors using the given node.

    :param reactor: The reactor to use to restart the Tahoe-LAFS node when it
        needs to be reconfigured.

    :param request: The pytest request object to use to arrange process
        cleanup.

    :param format: The name of the encryption/data format to use.

    :param alice: The Tahoe-LAFS node to use to generate the test vectors.

    :param case: The inputs for which to generate a value.

    :return: The capability for the case.
    """
    # Share placement doesn't affect the resulting capability.  For maximum
    # reliability of this generator, be happy if we can put shares anywhere
    happy = 1
    for case in cases:
        await reconfigure(
            reactor,
            request,
            alice,
            (happy, case.params.required, case.params.total),
            case.convergence
        )

        # Give the format a chance to make an RSA key if it needs it.
        case = evolve(case, fmt=case.fmt.customize())
        cap = upload(alice, case.fmt, case.data)
        yield case, cap
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`"""`
			`Verify certain results against test vectors with well-known results.`
			`"""`

Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`from __future__ import annotations`

Put the generator inputs into the output file This should make it easier for other implementations to use the test data, I think. Also put a version in there so we can change inputs in the future but still talk about results meaningfully. And some other minor refactoring 2022-12-26 17:06:34 +00:00			`from typing import AsyncGenerator, Iterator`
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`from hashlib import sha256`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`from itertools import starmap, product`
Get basic generation working, apparently 2022-12-22 15:51:59 +00:00			`from yaml import safe_dump`

reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`from attrs import evolve`

Get basic generation working, apparently 2022-12-22 15:51:59 +00:00			`from pytest import mark`
			`from pytest_twisted import ensureDeferred`
start of a test vector thingy 2022-12-21 22:14:08 +00:00
Move some more pieces into the subdirectory 2023-01-16 21:01:11 +00:00			`from .vectors import vectors`
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`from .util import CHK, SSK, reconfigure, upload, TahoeProcess`
Move some general utility functions into the util module 2022-12-22 22:02:42 +00:00
			`def digest(bs: bytes) -> bytes:`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`"""`
			`Digest bytes to bytes.`
			`"""`
Move some general utility functions into the util module 2022-12-22 22:02:42 +00:00			`return sha256(bs).digest()`

start of a test vector thingy 2022-12-21 22:14:08 +00:00
Move some general utility functions into the util module 2022-12-22 22:02:42 +00:00			`def hexdigest(bs: bytes) -> str:`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`"""`
			`Digest bytes to text.`
			`"""`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`return sha256(bs).hexdigest()`

Put the generator inputs into the output file This should make it easier for other implementations to use the test data, I think. Also put a version in there so we can change inputs in the future but still talk about results meaningfully. And some other minor refactoring 2022-12-26 17:06:34 +00:00			`# Just a couple convergence secrets. The only thing we do with this value is`
			`# feed it into a tagged hash. It certainly makes a difference to the output`
			`# but the hash should destroy any structure in the input so it doesn't seem`
			`# like there's a reason to test a lot of different values.`
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`CONVERGENCE_SECRETS = [`
			`b"aaaaaaaaaaaaaaaa",`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`digest(b"Hello world")[:16],`
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`]`

Put the generator inputs into the output file This should make it easier for other implementations to use the test data, I think. Also put a version in there so we can change inputs in the future but still talk about results meaningfully. And some other minor refactoring 2022-12-26 17:06:34 +00:00			`# Exercise at least a handful of different sizes, trying to cover:`
			`#`
			`# 1. Some cases smaller than one "segment" (128k).`
			`# This covers shrinking of some parameters to match data size.`
Re-generate vectors with a very small CHK 2023-01-14 02:14:37 +00:00			`# This includes one case of the smallest possible CHK.`
Put the generator inputs into the output file This should make it easier for other implementations to use the test data, I think. Also put a version in there so we can change inputs in the future but still talk about results meaningfully. And some other minor refactoring 2022-12-26 17:06:34 +00:00			`#`
			`# 2. Some cases right on the edges of integer segment multiples.`
			`# Because boundaries are tricky.`
			`#`
			`# 4. Some cases that involve quite a few segments.`
			`# This exercises merkle tree construction more thoroughly.`
			`#`
			# See ``stretch`` for construction of the actual test data.

			`SEGMENT_SIZE = 128 * 1024`
			`OBJECT_DESCRIPTIONS = [`
Re-generate vectors with a very small CHK 2023-01-14 02:14:37 +00:00			`# The smallest possible. 55 bytes and smaller are LIT.`
			`vectors.Sample(b"a", 56),`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`vectors.Sample(b"a", 1024),`
			`vectors.Sample(b"c", 4096),`
			`vectors.Sample(digest(b"foo"), SEGMENT_SIZE - 1),`
			`vectors.Sample(digest(b"bar"), SEGMENT_SIZE + 1),`
			`vectors.Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),`
			`vectors.Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),`
			`vectors.Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),`
			`vectors.Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),`
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`]`

			`ZFEC_PARAMS = [`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`vectors.SeedParam(1, 1),`
			`vectors.SeedParam(1, 3),`
			`vectors.SeedParam(2, 3),`
			`vectors.SeedParam(3, 10),`
			`vectors.SeedParam(71, 255),`
			`vectors.SeedParam(101, vectors.MAX_SHARES),`
Add SDMF and MDMF 2022-12-26 22:08:30 +00:00			`]`

			`FORMATS = [`
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`CHK(),`
			`# These start out unaware of a key but various keys will be supplied`
			`# during generation.`
			`SSK(name="sdmf", key=None),`
			`SSK(name="mdmf", key=None),`
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`]`

Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`@mark.parametrize('convergence', CONVERGENCE_SECRETS)`
			`def test_convergence(convergence):`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`"""`
			`Convergence secrets are 16 bytes.`
			`"""`
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`assert isinstance(convergence, bytes), "Convergence secret must be bytes"`
			`assert len(convergence) == 16, "Convergence secret must by 16 bytes"`


reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`@mark.parametrize('case_and_expected', vectors.capabilities.items())`
Get basic generation working, apparently 2022-12-22 15:51:59 +00:00			`@ensureDeferred`
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`async def test_capability(reactor, request, alice, case_and_expected):`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`"""`
Add SDMF and MDMF 2022-12-26 22:08:30 +00:00			`The capability that results from uploading certain well-known data`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`with certain well-known parameters results in exactly the previously`
			`computed value.`
			`"""`
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`case, expected = case_and_expected`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00
start of a test vector thingy 2022-12-21 22:14:08 +00:00			`# rewrite alice's config to match params and convergence`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)`
start of a test vector thingy 2022-12-21 22:14:08 +00:00
Add SDMF and MDMF 2022-12-26 22:08:30 +00:00			`# upload data in the correct format`
			`actual = upload(alice, case.fmt, case.data)`
start of a test vector thingy 2022-12-21 22:14:08 +00:00
			`# compare the resulting cap to the expected result`
			`assert actual == expected`

Get basic generation working, apparently 2022-12-22 15:51:59 +00:00
			`@ensureDeferred`
Move some more pieces into the subdirectory 2023-01-16 21:01:11 +00:00			`async def skiptest_generate(reactor, request, alice):`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`"""`
			`This is a helper for generating the test vectors.`

			`You can re-generate the test vectors by fixing the name of the test and`
			`running it. Normally this test doesn't run because it ran once and we`
			`captured its output. Other tests run against that output and we want them`
			`to run against the results produced originally, not a possibly`
			`ever-changing set of outputs.`
			`"""`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`space = starmap(vectors.Case, product(`
			`ZFEC_PARAMS,`
			`CONVERGENCE_SECRETS,`
			`OBJECT_DESCRIPTIONS,`
			`FORMATS,`
			`))`
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`iterresults = generate(reactor, request, alice, space)`

			`# Update the output file with results as they become available.`
			`results = []`
			`async for result in iterresults:`
			`results.append(result)`
			`write_results(vectors.DATA_PATH, results)`

			`def write_results(path: FilePath, results: list[tuple[Case, str]]) -> None:`
			`"""`
			`Save the given results.`
			`"""`
			`path.setContent(safe_dump({`
			`"version": vectors.CURRENT_VERSION,`
write the data file more safely 2023-01-12 21:56:20 +00:00			`"vector": [`
			`{`
			`"convergence": vectors.encode_bytes(case.convergence),`
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`"format": {`
			`"kind": case.fmt.kind,`
			`"params": case.fmt.to_json(),`
			`},`
write the data file more safely 2023-01-12 21:56:20 +00:00			`"sample": {`
			`"seed": vectors.encode_bytes(case.seed_data.seed),`
			`"length": case.seed_data.length,`
			`},`
			`"zfec": {`
			`"segmentSize": SEGMENT_SIZE,`
			`"required": case.params.required,`
			`"total": case.params.total,`
			`},`
			`"expected": cap,`
			`}`
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`for (case, cap)`
write the data file more safely 2023-01-12 21:56:20 +00:00			`in results`
			`],`
Switch to FilePath, regenerate w/o "max" 2023-01-12 22:27:37 +00:00			`}).encode("ascii"))`
Put the generator inputs into the output file This should make it easier for other implementations to use the test data, I think. Also put a version in there so we can change inputs in the future but still talk about results meaningfully. And some other minor refactoring 2022-12-26 17:06:34 +00:00
			`async def generate(`
			`reactor,`
			`request,`
			`alice: TahoeProcess,`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`cases: Iterator[vectors.Case],`
			`) -> AsyncGenerator[[vectors.Case, str], None]:`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`"""`
			`Generate all of the test vectors using the given node.`

			`:param reactor: The reactor to use to restart the Tahoe-LAFS node when it`
			`needs to be reconfigured.`

			`:param request: The pytest request object to use to arrange process`
			`cleanup.`

Add SDMF and MDMF 2022-12-26 22:08:30 +00:00			`:param format: The name of the encryption/data format to use.`

Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`:param alice: The Tahoe-LAFS node to use to generate the test vectors.`

Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`:param case: The inputs for which to generate a value.`
Add SDMF and MDMF 2022-12-26 22:08:30 +00:00
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`:return: The capability for the case.`
Comments and minor factoring improvements and such 2022-12-22 21:52:00 +00:00			`"""`
Put the generator inputs into the output file This should make it easier for other implementations to use the test data, I think. Also put a version in there so we can change inputs in the future but still talk about results meaningfully. And some other minor refactoring 2022-12-26 17:06:34 +00:00			`# Share placement doesn't affect the resulting capability. For maximum`
clarify what reliability we hope for 2022-12-27 14:03:24 +00:00			`# reliability of this generator, be happy if we can put shares anywhere`
Put the generator inputs into the output file This should make it easier for other implementations to use the test data, I think. Also put a version in there so we can change inputs in the future but still talk about results meaningfully. And some other minor refactoring 2022-12-26 17:06:34 +00:00			`happy = 1`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`for case in cases:`
			`await reconfigure(`
			`reactor,`
			`request,`
			`alice,`
			`(happy, case.params.required, case.params.total),`
			`case.convergence`
			`)`
start of a test vector thingy 2022-12-21 22:14:08 +00:00
reproducible ssk vectors 2023-01-16 20:53:24 +00:00			`# Give the format a chance to make an RSA key if it needs it.`
			`case = evolve(case, fmt=case.fmt.customize())`
Add SDMF and MDMF 2022-12-26 22:08:30 +00:00			`cap = upload(alice, case.fmt, case.data)`
Generate and consumer the new structure properly 2023-01-04 00:22:38 +00:00			`yield case, cap`