Merge pull request #1339 from tahoe-lafs/4065-try-to-speed-up-first-benchmark

Try (and maybe kinda sorta succeed?) to speed up first benchmark Fixes ticket:4065
2025-04-19 00:06:57 +00:00 · 2023-10-16 13:15:43 -04:00 · 2023-10-16 13:15:43 -04:00 · 5d0a619572
commit 5d0a619572
parent 47c9a95d42 7b5be93657
5 changed files with 77 additions and 31 deletions
--- a/benchmarks/init.py
+++ b/benchmarks/init.py
@ -1,8 +1,12 @@
-"""pytest-based end-to-end benchmarks of Tahoe-LAFS.
+"""
+pytest-based end-to-end benchmarks of Tahoe-LAFS.

 Usage:

-$ pytest benchmark --number-of-nodes=3
+$ systemd-run --user --scope pytest benchmark --number-of-nodes=3

 It's possible to pass --number-of-nodes multiple times.
+
+The systemd-run makes sure the tests run in their own cgroup so we get CPU
+accounting correct.
 """
--- a/benchmarks/conftest.py
+++ b/benchmarks/conftest.py
@ -5,6 +5,7 @@ The number of nodes is parameterized via a --number-of-nodes CLI option added
 to pytest.
 """

+import os
 from shutil import which, rmtree
 from tempfile import mkdtemp
 from contextlib import contextmanager
@ -106,19 +107,42 @@ def client_node(request, grid, storage_nodes, number_of_nodes) -> Client:
    print(f"Client node pid: {client_node.process.transport.pid}")
    return client_node

+def get_cpu_time_for_cgroup():
+    """
+    Get how many CPU seconds have been used in current cgroup so far.
+
+    Assumes we're running in a v2 cgroup.
+    """
+    with open("/proc/self/cgroup") as f:
+        cgroup = f.read().strip().split(":")[-1]
+        assert cgroup.startswith("/")
+        cgroup = cgroup[1:]
+    cpu_stat = os.path.join("/sys/fs/cgroup", cgroup, "cpu.stat")
+    with open(cpu_stat) as f:
+        for line in f.read().splitlines():
+            if line.startswith("usage_usec"):
+                return int(line.split()[1]) / 1_000_000
+    raise ValueError("Failed to find usage_usec")
+

 class Benchmarker:
    """Keep track of benchmarking results."""

    @contextmanager
-    def record(self, name, **parameters):
+    def record(self, capsys: pytest.CaptureFixture[str], name, **parameters):
        """Record the timing of running some code, if it succeeds."""
+        start_cpu = get_cpu_time_for_cgroup()
        start = time()
        yield
        elapsed = time() - start
-        # For now we just print the outcome:
+        end_cpu = get_cpu_time_for_cgroup()
+        elapsed_cpu = end_cpu - start_cpu
+        # FOR now we just print the outcome:
        parameters = " ".join(f"{k}={v}" for (k, v) in parameters.items())
-        print(f"BENCHMARK RESULT: {name} {parameters} elapsed {elapsed} secs")
+        with capsys.disabled():
+            print(
+                f"\nBENCHMARK RESULT: {name} {parameters} elapsed={elapsed:.3} (secs) CPU={elapsed_cpu:.3} (secs)\n"
+            )


@pytest.fixture(scope="session")
--- a/benchmarks/test_cli.py
+++ b/benchmarks/test_cli.py
@ -7,42 +7,60 @@ import pytest
 from integration.util import cli


-@pytest.fixture(scope="session")
+@pytest.fixture(scope="module", autouse=True)
 def cli_alias(client_node):
    cli(client_node.process, "create-alias", "cli")


-def test_get_put_one_file(
-    client_node, cli_alias, tmp_path, tahoe_benchmarker, number_of_nodes
+@pytest.mark.parametrize("file_size", [1000, 100_000, 1_000_000, 10_000_000])
+def test_get_put_files_sequentially(
+    file_size,
+    client_node,
+    tahoe_benchmarker,
+    number_of_nodes,
+    capsys,
 ):
    """
-    Upload a file with ``tahoe put`` and then download it with ``tahoe get``,
-    measuring the latency of both operations.
+    Upload 5 files with ``tahoe put`` and then download them with ``tahoe
+    get``, measuring the latency of both operations.  We do multiple uploads
+    and downloads to try to reduce noise.
    """
-    file_size = 1000  # parameterize later on
-    file_path = tmp_path / "file"
    DATA = b"0123456789" * (file_size // 10)
-    with file_path.open("wb") as f:
-        f.write(DATA)

    with tahoe_benchmarker.record(
-        "cli-put-file", file_size=file_size, number_of_nodes=number_of_nodes
+        capsys, "cli-put-5-file-sequentially", file_size=file_size, number_of_nodes=number_of_nodes
    ):
-        cli(client_node.process, "put", str(file_path), "cli:tostdout")
+        for i in range(5):
+            p = Popen(
+                [
+                    "tahoe",
+                    "--node-directory",
+                    client_node.process.node_dir,
+                    "put",
+                    "-",
+                    f"cli:get_put_files_sequentially{i}",
+                ],
+                stdin=PIPE,
+            )
+            p.stdin.write(DATA)
+            p.stdin.write(str(i).encode("ascii"))
+            p.stdin.close()
+            assert p.wait() == 0

    with tahoe_benchmarker.record(
-        "cli-get-file", file_size=file_size, number_of_nodes=number_of_nodes
+        capsys, "cli-get-5-files-sequentially", file_size=file_size, number_of_nodes=number_of_nodes
    ):
-        p = Popen(
-            [
-                "tahoe",
-                "--node-directory",
-                client_node.process.node_dir,
-                "get",
-                "cli:tostdout",
-                "-",
-            ],
-            stdout=PIPE,
-        )
-        assert p.stdout.read() == DATA
-        assert p.wait() == 0
+        for i in range(5):
+            p = Popen(
+                [
+                    "tahoe",
+                    "--node-directory",
+                    client_node.process.node_dir,
+                    "get",
+                    f"cli:get_put_files_sequentially{i}",
+                    "-",
+                ],
+                stdout=PIPE,
+            )
+            assert p.stdout.read() == DATA + str(i).encode("ascii")
+            assert p.wait() == 0
--- a/newsfragments/4065.minor
+++ b/newsfragments/4065.minor
--- a/setup.py
+++ b/setup.py
@ -435,7 +435,7 @@ setup(name="tahoe-lafs", # also set in __init__.py
              "paramiko < 2.9",
              "pytest-timeout",
              # Does our OpenMetrics endpoint adhere to the spec:
-              "prometheus-client == 0.11.0"
+              "prometheus-client == 0.11.0",
          ] + tor_requires + i2p_requires,
          "tor": tor_requires,
          "i2p": i2p_requires,