Merge pull request #1339 from tahoe-lafs/4065-try-to-speed-up-first-benchmark

Try (and maybe kinda sorta succeed?) to speed up first benchmark

Fixes ticket:4065
This commit is contained in:
Itamar Turner-Trauring 2023-10-16 13:15:43 -04:00 committed by GitHub
commit 5d0a619572
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 31 deletions

View File

@ -1,8 +1,12 @@
"""pytest-based end-to-end benchmarks of Tahoe-LAFS.
"""
pytest-based end-to-end benchmarks of Tahoe-LAFS.
Usage:
$ pytest benchmark --number-of-nodes=3
$ systemd-run --user --scope pytest benchmark --number-of-nodes=3
It's possible to pass --number-of-nodes multiple times.
The systemd-run makes sure the tests run in their own cgroup so we get CPU
accounting correct.
"""

View File

@ -5,6 +5,7 @@ The number of nodes is parameterized via a --number-of-nodes CLI option added
to pytest.
"""
import os
from shutil import which, rmtree
from tempfile import mkdtemp
from contextlib import contextmanager
@ -106,19 +107,42 @@ def client_node(request, grid, storage_nodes, number_of_nodes) -> Client:
print(f"Client node pid: {client_node.process.transport.pid}")
return client_node
def get_cpu_time_for_cgroup():
"""
Get how many CPU seconds have been used in current cgroup so far.
Assumes we're running in a v2 cgroup.
"""
with open("/proc/self/cgroup") as f:
cgroup = f.read().strip().split(":")[-1]
assert cgroup.startswith("/")
cgroup = cgroup[1:]
cpu_stat = os.path.join("/sys/fs/cgroup", cgroup, "cpu.stat")
with open(cpu_stat) as f:
for line in f.read().splitlines():
if line.startswith("usage_usec"):
return int(line.split()[1]) / 1_000_000
raise ValueError("Failed to find usage_usec")
class Benchmarker:
"""Keep track of benchmarking results."""
@contextmanager
def record(self, name, **parameters):
def record(self, capsys: pytest.CaptureFixture[str], name, **parameters):
"""Record the timing of running some code, if it succeeds."""
start_cpu = get_cpu_time_for_cgroup()
start = time()
yield
elapsed = time() - start
# For now we just print the outcome:
end_cpu = get_cpu_time_for_cgroup()
elapsed_cpu = end_cpu - start_cpu
# FOR now we just print the outcome:
parameters = " ".join(f"{k}={v}" for (k, v) in parameters.items())
print(f"BENCHMARK RESULT: {name} {parameters} elapsed {elapsed} secs")
with capsys.disabled():
print(
f"\nBENCHMARK RESULT: {name} {parameters} elapsed={elapsed:.3} (secs) CPU={elapsed_cpu:.3} (secs)\n"
)
@pytest.fixture(scope="session")

View File

@ -7,42 +7,60 @@ import pytest
from integration.util import cli
@pytest.fixture(scope="session")
@pytest.fixture(scope="module", autouse=True)
def cli_alias(client_node):
cli(client_node.process, "create-alias", "cli")
def test_get_put_one_file(
client_node, cli_alias, tmp_path, tahoe_benchmarker, number_of_nodes
@pytest.mark.parametrize("file_size", [1000, 100_000, 1_000_000, 10_000_000])
def test_get_put_files_sequentially(
file_size,
client_node,
tahoe_benchmarker,
number_of_nodes,
capsys,
):
"""
Upload a file with ``tahoe put`` and then download it with ``tahoe get``,
measuring the latency of both operations.
Upload 5 files with ``tahoe put`` and then download them with ``tahoe
get``, measuring the latency of both operations. We do multiple uploads
and downloads to try to reduce noise.
"""
file_size = 1000 # parameterize later on
file_path = tmp_path / "file"
DATA = b"0123456789" * (file_size // 10)
with file_path.open("wb") as f:
f.write(DATA)
with tahoe_benchmarker.record(
"cli-put-file", file_size=file_size, number_of_nodes=number_of_nodes
capsys, "cli-put-5-file-sequentially", file_size=file_size, number_of_nodes=number_of_nodes
):
cli(client_node.process, "put", str(file_path), "cli:tostdout")
for i in range(5):
p = Popen(
[
"tahoe",
"--node-directory",
client_node.process.node_dir,
"put",
"-",
f"cli:get_put_files_sequentially{i}",
],
stdin=PIPE,
)
p.stdin.write(DATA)
p.stdin.write(str(i).encode("ascii"))
p.stdin.close()
assert p.wait() == 0
with tahoe_benchmarker.record(
"cli-get-file", file_size=file_size, number_of_nodes=number_of_nodes
capsys, "cli-get-5-files-sequentially", file_size=file_size, number_of_nodes=number_of_nodes
):
p = Popen(
[
"tahoe",
"--node-directory",
client_node.process.node_dir,
"get",
"cli:tostdout",
"-",
],
stdout=PIPE,
)
assert p.stdout.read() == DATA
assert p.wait() == 0
for i in range(5):
p = Popen(
[
"tahoe",
"--node-directory",
client_node.process.node_dir,
"get",
f"cli:get_put_files_sequentially{i}",
"-",
],
stdout=PIPE,
)
assert p.stdout.read() == DATA + str(i).encode("ascii")
assert p.wait() == 0

0
newsfragments/4065.minor Normal file
View File

View File

@ -435,7 +435,7 @@ setup(name="tahoe-lafs", # also set in __init__.py
"paramiko < 2.9",
"pytest-timeout",
# Does our OpenMetrics endpoint adhere to the spec:
"prometheus-client == 0.11.0"
"prometheus-client == 0.11.0",
] + tor_requires + i2p_requires,
"tor": tor_requires,
"i2p": i2p_requires,