mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-18 20:47:54 +00:00
Merge pull request #1337 from tahoe-lafs/4060-my-first-benchmark
A very first benchmark Fixes ticket:4060
This commit is contained in:
commit
20c85a86a5
8
benchmarks/__init__.py
Normal file
8
benchmarks/__init__.py
Normal file
@ -0,0 +1,8 @@
|
||||
"""pytest-based end-to-end benchmarks of Tahoe-LAFS.
|
||||
|
||||
Usage:
|
||||
|
||||
$ pytest benchmark --number-of-nodes=3
|
||||
|
||||
It's possible to pass --number-of-nodes multiple times.
|
||||
"""
|
126
benchmarks/conftest.py
Normal file
126
benchmarks/conftest.py
Normal file
@ -0,0 +1,126 @@
|
||||
"""
|
||||
pytest infrastructure for benchmarks.
|
||||
|
||||
The number of nodes is parameterized via a --number-of-nodes CLI option added
|
||||
to pytest.
|
||||
"""
|
||||
|
||||
from shutil import which, rmtree
|
||||
from tempfile import mkdtemp
|
||||
from contextlib import contextmanager
|
||||
from time import time
|
||||
|
||||
import pytest
|
||||
import pytest_twisted
|
||||
|
||||
from twisted.internet import reactor
|
||||
from twisted.internet.defer import DeferredList, succeed
|
||||
|
||||
from allmydata.util.iputil import allocate_tcp_port
|
||||
|
||||
from integration.grid import Client, create_grid, create_flog_gatherer
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--number-of-nodes",
|
||||
action="append",
|
||||
default=[],
|
||||
type=int,
|
||||
help="list of number_of_nodes to benchmark against",
|
||||
)
|
||||
# Required to be compatible with integration.util code that we indirectly
|
||||
# depend on, but also might be useful.
|
||||
parser.addoption(
|
||||
"--force-foolscap",
|
||||
action="store_true",
|
||||
default=False,
|
||||
dest="force_foolscap",
|
||||
help=(
|
||||
"If set, force Foolscap only for the storage protocol. "
|
||||
+ "Otherwise HTTP will be used."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
# Make number_of_nodes accessible as a parameterized fixture:
|
||||
if "number_of_nodes" in metafunc.fixturenames:
|
||||
metafunc.parametrize(
|
||||
"number_of_nodes",
|
||||
metafunc.config.getoption("number_of_nodes"),
|
||||
scope="session",
|
||||
)
|
||||
|
||||
|
||||
def port_allocator():
|
||||
port = allocate_tcp_port()
|
||||
return succeed(port)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def grid(request):
|
||||
"""
|
||||
Provides a new Grid with a single Introducer and flog-gathering process.
|
||||
|
||||
Notably does _not_ provide storage servers; use the storage_nodes
|
||||
fixture if your tests need a Grid that can be used for puts / gets.
|
||||
"""
|
||||
tmp_path = mkdtemp(prefix="tahoe-benchmark")
|
||||
request.addfinalizer(lambda: rmtree(tmp_path))
|
||||
flog_binary = which("flogtool")
|
||||
flog_gatherer = pytest_twisted.blockon(
|
||||
create_flog_gatherer(reactor, request, tmp_path, flog_binary)
|
||||
)
|
||||
g = pytest_twisted.blockon(
|
||||
create_grid(reactor, request, tmp_path, flog_gatherer, port_allocator)
|
||||
)
|
||||
return g
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def storage_nodes(grid, number_of_nodes):
|
||||
nodes_d = []
|
||||
for _ in range(number_of_nodes):
|
||||
nodes_d.append(grid.add_storage_node())
|
||||
|
||||
nodes_status = pytest_twisted.blockon(DeferredList(nodes_d))
|
||||
for ok, value in nodes_status:
|
||||
assert ok, "Storage node creation failed: {}".format(value)
|
||||
return grid.storage_servers
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def client_node(request, grid, storage_nodes, number_of_nodes) -> Client:
|
||||
"""
|
||||
Create a grid client node with number of shares matching number of nodes.
|
||||
"""
|
||||
client_node = pytest_twisted.blockon(
|
||||
grid.add_client(
|
||||
"client_node",
|
||||
needed=number_of_nodes,
|
||||
happy=number_of_nodes,
|
||||
total=number_of_nodes,
|
||||
)
|
||||
)
|
||||
print(f"Client node pid: {client_node.process.transport.pid}")
|
||||
return client_node
|
||||
|
||||
|
||||
class Benchmarker:
|
||||
"""Keep track of benchmarking results."""
|
||||
|
||||
@contextmanager
|
||||
def record(self, name, **parameters):
|
||||
"""Record the timing of running some code, if it succeeds."""
|
||||
start = time()
|
||||
yield
|
||||
elapsed = time() - start
|
||||
# For now we just print the outcome:
|
||||
parameters = " ".join(f"{k}={v}" for (k, v) in parameters.items())
|
||||
print(f"BENCHMARK RESULT: {name} {parameters} elapsed {elapsed} secs")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tahoe_benchmarker():
|
||||
return Benchmarker()
|
48
benchmarks/test_cli.py
Normal file
48
benchmarks/test_cli.py
Normal file
@ -0,0 +1,48 @@
|
||||
"""Benchmarks for minimal `tahoe` CLI interactions."""
|
||||
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
import pytest
|
||||
|
||||
from integration.util import cli
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def cli_alias(client_node):
|
||||
cli(client_node.process, "create-alias", "cli")
|
||||
|
||||
|
||||
def test_get_put_one_file(
|
||||
client_node, cli_alias, tmp_path, tahoe_benchmarker, number_of_nodes
|
||||
):
|
||||
"""
|
||||
Upload a file with ``tahoe put`` and then download it with ``tahoe get``,
|
||||
measuring the latency of both operations.
|
||||
"""
|
||||
file_size = 1000 # parameterize later on
|
||||
file_path = tmp_path / "file"
|
||||
DATA = b"0123456789" * (file_size // 10)
|
||||
with file_path.open("wb") as f:
|
||||
f.write(DATA)
|
||||
|
||||
with tahoe_benchmarker.record(
|
||||
"cli-put-file", file_size=file_size, number_of_nodes=number_of_nodes
|
||||
):
|
||||
cli(client_node.process, "put", str(file_path), "cli:tostdout")
|
||||
|
||||
with tahoe_benchmarker.record(
|
||||
"cli-get-file", file_size=file_size, number_of_nodes=number_of_nodes
|
||||
):
|
||||
p = Popen(
|
||||
[
|
||||
"tahoe",
|
||||
"--node-directory",
|
||||
client_node.process.node_dir,
|
||||
"get",
|
||||
"cli:tostdout",
|
||||
"-",
|
||||
],
|
||||
stdout=PIPE,
|
||||
)
|
||||
assert p.stdout.read() == DATA
|
||||
assert p.wait() == 0
|
@ -1,138 +0,0 @@
|
||||
"""
|
||||
First attempt at benchmarking uploads and downloads.
|
||||
|
||||
To run:
|
||||
|
||||
$ pytest benchmarks/upload_download.py -s -v -Wignore
|
||||
|
||||
To add latency of e.g. 60ms on Linux:
|
||||
|
||||
$ tc qdisc add dev lo root netem delay 30ms
|
||||
|
||||
To reset:
|
||||
|
||||
$ tc qdisc del dev lo root netem
|
||||
|
||||
Frequency scaling can spoil the results.
|
||||
To see the range of frequency scaling on a Linux system:
|
||||
|
||||
$ cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_available_frequencies
|
||||
|
||||
And to pin the CPU frequency to the lower bound found in these files:
|
||||
|
||||
$ sudo cpupower frequency-set -f <lowest available frequency>
|
||||
|
||||
TODO Parameterization (pytest?)
|
||||
|
||||
- Foolscap vs not foolscap
|
||||
|
||||
- Number of nodes
|
||||
|
||||
- Data size
|
||||
|
||||
- Number of needed/happy/total shares.
|
||||
|
||||
CAVEATS: The goal here isn't a realistic benchmark, or a benchmark that will be
|
||||
measured over time, or is expected to be maintainable over time. This is just
|
||||
a quick and easy way to measure the speed of certain operations, compare HTTP
|
||||
and Foolscap, and see the short-term impact of changes.
|
||||
|
||||
Eventually this will be replaced by a real benchmark suite that can be run over
|
||||
time to measure something more meaningful.
|
||||
"""
|
||||
|
||||
from time import time, process_time
|
||||
from contextlib import contextmanager
|
||||
from tempfile import mkdtemp
|
||||
import os
|
||||
|
||||
from twisted.trial.unittest import TestCase
|
||||
from twisted.internet.defer import gatherResults
|
||||
|
||||
from allmydata.util.deferredutil import async_to_deferred
|
||||
from allmydata.util.consumer import MemoryConsumer
|
||||
from allmydata.test.common_system import SystemTestMixin
|
||||
from allmydata.immutable.upload import Data as UData
|
||||
from allmydata.mutable.publish import MutableData
|
||||
|
||||
|
||||
@contextmanager
|
||||
def timeit(name):
|
||||
start = time()
|
||||
start_cpu = process_time()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
print(
|
||||
f"{name}: {time() - start:.3f} elapsed, {process_time() - start_cpu:.3f} CPU"
|
||||
)
|
||||
|
||||
|
||||
class ImmutableBenchmarks(SystemTestMixin, TestCase):
|
||||
"""Benchmarks for immutables."""
|
||||
|
||||
# To use Foolscap, change to True:
|
||||
FORCE_FOOLSCAP_FOR_STORAGE = False
|
||||
|
||||
# Don't reduce HTTP connection timeouts, that messes up the more aggressive
|
||||
# benchmarks:
|
||||
REDUCE_HTTP_CLIENT_TIMEOUT = False
|
||||
|
||||
@async_to_deferred
|
||||
async def setUp(self):
|
||||
SystemTestMixin.setUp(self)
|
||||
self.basedir = os.path.join(mkdtemp(), "nodes")
|
||||
|
||||
# 2 nodes
|
||||
await self.set_up_nodes(2)
|
||||
|
||||
# 1 share
|
||||
for c in self.clients:
|
||||
c.encoding_params["k"] = 1
|
||||
c.encoding_params["happy"] = 1
|
||||
c.encoding_params["n"] = 1
|
||||
|
||||
print()
|
||||
|
||||
@async_to_deferred
|
||||
async def test_upload_and_download_immutable(self):
|
||||
# To test larger files, change this:
|
||||
DATA = b"Some data to upload\n" * 10
|
||||
|
||||
for i in range(5):
|
||||
# 1. Upload:
|
||||
with timeit(" upload"):
|
||||
uploader = self.clients[0].getServiceNamed("uploader")
|
||||
results = await uploader.upload(UData(DATA, convergence=None))
|
||||
|
||||
# 2. Download:
|
||||
with timeit("download"):
|
||||
uri = results.get_uri()
|
||||
node = self.clients[1].create_node_from_uri(uri)
|
||||
mc = await node.read(MemoryConsumer(), 0, None)
|
||||
self.assertEqual(b"".join(mc.chunks), DATA)
|
||||
|
||||
@async_to_deferred
|
||||
async def test_upload_and_download_mutable(self):
|
||||
# To test larger files, change this:
|
||||
DATA = b"Some data to upload\n" * 10
|
||||
|
||||
for i in range(5):
|
||||
# 1. Upload:
|
||||
with timeit(" upload"):
|
||||
result = await self.clients[0].create_mutable_file(MutableData(DATA))
|
||||
|
||||
# 2. Download:
|
||||
with timeit("download"):
|
||||
data = await result.download_best_version()
|
||||
self.assertEqual(data, DATA)
|
||||
|
||||
@async_to_deferred
|
||||
async def test_upload_mutable_in_parallel(self):
|
||||
# To test larger files, change this:
|
||||
DATA = b"Some data to upload\n" * 1_000_000
|
||||
with timeit(" upload"):
|
||||
await gatherResults([
|
||||
self.clients[0].create_mutable_file(MutableData(DATA))
|
||||
for _ in range(20)
|
||||
])
|
@ -240,7 +240,7 @@ def _tahoe_runner_optional_coverage(proto, reactor, request, other_args):
|
||||
allmydata.scripts.runner` and `other_args`, optionally inserting a
|
||||
`--coverage` option if the `request` indicates we should.
|
||||
"""
|
||||
if request.config.getoption('coverage'):
|
||||
if request.config.getoption('coverage', False):
|
||||
args = [sys.executable, '-b', '-m', 'coverage', 'run', '-m', 'allmydata.scripts.runner', '--coverage']
|
||||
else:
|
||||
args = [sys.executable, '-b', '-m', 'allmydata.scripts.runner']
|
||||
|
1
newsfragments/4060.feature
Normal file
1
newsfragments/4060.feature
Normal file
@ -0,0 +1 @@
|
||||
Started work on a new end-to-end benchmarking framework.
|
2
tox.ini
2
tox.ini
@ -109,7 +109,7 @@ passenv = HOME
|
||||
setenv =
|
||||
# If no positional arguments are given, try to run the checks on the
|
||||
# entire codebase, including various pieces of supporting code.
|
||||
DEFAULT_FILES=src integration static misc setup.py
|
||||
DEFAULT_FILES=src integration benchmarks static misc setup.py
|
||||
commands =
|
||||
ruff check {posargs:{env:DEFAULT_FILES}}
|
||||
python misc/coding_tools/check-umids.py {posargs:{env:DEFAULT_FILES}}
|
||||
|
Loading…
Reference in New Issue
Block a user