Allow config of all experiment params, average across runs

This commit is contained in:
Chris Ball
2023-09-03 06:14:16 -07:00
parent 8e8acd0a04
commit 91938d2dfc

View File

@ -1,161 +1,197 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Requires Python 3.6+. # Part of the aflpluslpus project, requires Python 3.7+.
# Author: Chris Ball <chris@printf.net> # Author: Chris Ball <chris@printf.net>, ported from Marc "van Hauser" Heuse's "benchmark.sh".
# Ported from Marc "van Hauser" Heuse's "benchmark.sh".
import argparse import argparse
import asyncio import asyncio
import glob
import json import json
import multiprocessing import multiprocessing
import os import os
import platform
import shutil import shutil
import sys import sys
from collections import defaultdict import time
from dataclasses import dataclass
from decimal import Decimal from decimal import Decimal
from enum import Enum, auto
from pathlib import Path
reset = "\033[0m" blue = lambda text: f"\033[1;94m{text}\033[0m"; gray = lambda text: f"\033[1;90m{text}\033[0m"
blue = lambda text: f"\033[1;94m{text}{reset}" green = lambda text: f"\033[0;32m{text}\033[0m"; red = lambda text: f"\033[0;31m{text}\033[0m"
gray = lambda text: f"\033[1;90m{text}{reset}" yellow = lambda text: f"\033[0;33m{text}\033[0m"
green = lambda text: f"\033[0;32m{text}{reset}"
red = lambda text: f"\033[0;31m{text}{reset}"
targets = [ class Mode(Enum):
{"source": "../test-instr.c", "binary": "test-instr"}, multicore = auto()
{"source": "../utils/persistent_mode/test-instr.c", "binary": "test-instr-persistent-shmem"}, singlecore = auto()
@dataclass
class Target:
source: Path
binary: str
all_modes = [Mode.singlecore, Mode.multicore]
all_targets = [
Target(source=Path("../utils/persistent_mode/test-instr.c").resolve(), binary="test-instr-persist-shmem"),
Target(source=Path("../test-instr.c").resolve(), binary="test-instr")
] ]
modes = ["single-core", "multi-core"] mode_names = [mode.name for mode in all_modes]
tree = lambda: defaultdict(tree) # recursive (arbitrary-depth) defaultdict! target_names = [target.binary for target in all_targets]
results = tree() cpu_count = multiprocessing.cpu_count()
between_tests = False
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-d", "--debug", action="store_true") parser.add_argument("-b", "--basedir", help="directory to use for temp files", type=str, default="/tmp/aflpp-benchmark")
parser.add_argument("-d", "--debug", help="show verbose debugging output", action="store_true")
parser.add_argument("-r", "--runs", help="how many runs to average results over", type=int, default=5)
parser.add_argument("-f", "--fuzzers", help="how many afl-fuzz workers to use", type=int, default=cpu_count)
parser.add_argument("-m", "--mode", help="pick modes", action="append", default=["multicore"], choices=mode_names)
parser.add_argument(
"-t", "--target", help="pick targets", action="append", default=["test-instr-persist-shmem"], choices=target_names
)
args = parser.parse_args() args = parser.parse_args()
async def clean_up() -> None: # Really unsatisfying argparse behavior: we want a default and to allow multiple choices, but if there's a manual choice
"""Remove temporary files.""" # it should override the default. Seems like we have to remove the default to get that and have correct help text?
shutil.rmtree("in") if len(args.target) > 1: args.target = args.target[1:]
if len(args.mode) > 1: args.mode = args.mode[1:]
targets = [target for target in all_targets if target.binary in args.target]
modes = [mode for mode in all_modes if mode.name in args.mode]
results = {"config": {}, "hardware": {}, "targets": {t.binary: {m.name: {} for m in modes} for t in targets}}
debug = lambda text: args.debug and print(blue(text))
if Mode.multicore in modes:
print(blue(f" [*] Using {args.fuzzers} fuzzers for multicore fuzzing "), end="")
print(blue("(use --fuzzers to override)" if args.fuzzers == cpu_count else f"(the default is {cpu_count})"))
async def clean_up_tempfiles() -> None:
shutil.rmtree(f"{args.basedir}/in")
for target in targets: for target in targets:
os.remove(target["binary"]) Path(target.binary).unlink()
for mode in modes: for mode in modes:
for outdir in glob.glob(f"/tmp/out-{mode}-{target['binary']}*"): shutil.rmtree(f"{args.basedir}/out-{mode.name}-{target.binary}")
shutil.rmtree(outdir)
async def check_afl_persistent() -> bool:
with open("/proc/cmdline", "r") as cpuinfo:
return "mitigations=off" in cpuinfo.read().split(" ")
async def check_afl_system() -> bool:
sysctl = next((s for s in ["sysctl", "/sbin/sysctl"] if shutil.which(s)), None)
if sysctl:
(returncode, stdout, _) = await run_command([sysctl, "kernel.randomize_va_space"], None)
return returncode == 0 and stdout.decode().rstrip().split(" = ")[1] == "0"
return False
async def check_deps() -> None: async def check_deps() -> None:
"""Check if the necessary files exist and are executable.""" """Checks for dependencies, platform, performance."""
if not (os.access("../afl-fuzz", os.X_OK) and os.access("../afl-cc", os.X_OK) and os.path.exists("../SanitizerCoveragePCGUARD.so")): plat = platform.system()
sys.exit(f'{red(" [*] Error: you need to compile AFL++ first, we need afl-fuzz, afl-clang-fast and SanitizerCoveragePCGUARD.so built.")}') if not plat == "Linux": sys.exit(red(f" [*] Error: Your platform '{plat}' is not supported by this script yet."))
if not os.access(Path("../afl-fuzz").resolve(), os.X_OK) and os.access(Path("../afl-cc").resolve(), os.X_OK) and (
os.path.exists(Path("../SanitizerCoveragePCGUARD.so").resolve()
)):
sys.exit(red(" [*] Compile AFL++: we need afl-fuzz, afl-clang-fast and SanitizerCoveragePCGUARD.so built."))
# Pick some sample settings from afl-{persistent,system}-config to try to see whether they were run.
cmd_checks = {"afl-persistent-config": check_afl_persistent, "afl-system-config": check_afl_system}
for cmd, checker in cmd_checks.items():
results["config"][cmd] = await checker()
if not results["config"][cmd]:
print(yellow(f" [*] {cmd} was not run. You can run it to improve performance (and decrease security)."))
async def prep_env() -> dict: async def prep_env() -> dict:
# Unset AFL_* environment variables """Unset AFL_* environment variables, create corpus dir and file, provide env vars for fuzzing."""
for e in list(os.environ.keys()): Path(args.basedir).mkdir(exist_ok=True)
if e.startswith("AFL_"): Path(f"{args.basedir}/in").mkdir(exist_ok=True)
os.environ.pop(e) with open(f"{args.basedir}/in/in.txt", "wb") as seed: seed.write(b"\x00" * 10240)
# Create input directory and file
os.makedirs("in", exist_ok=True)
with open("in/in.txt", "wb") as f:
f.write(b"\x00" * 10240)
# Rest of env
AFL_PATH = os.path.abspath("../")
os.environ["PATH"] = AFL_PATH + ":" + os.environ["PATH"]
return { return {
"AFL_BENCH_JUST_ONE": "1", "AFL_BENCH_JUST_ONE": "1", "AFL_DISABLE_TRIM": "1", "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES": "1",
"AFL_DISABLE_TRIM": "1", "AFL_NO_UI": "1", "AFL_TRY_AFFINITY": "1", "PATH": str(Path("../").resolve()),
"AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES": "1",
"AFL_NO_UI": "1",
"AFL_TRY_AFFINITY": "1",
"PATH": f"{AFL_PATH}:{os.environ['PATH']}",
} }
async def compile_target(source: str, binary: str) -> None: async def compile_target(source: str, binary: str) -> None:
(returncode, stdout, stderr) = await run_command( (returncode, stdout, stderr) = await run_command(
["afl-cc", "-o", binary, source], [Path("../afl-cc").resolve(), "-o", binary, source], env={"AFL_INSTRUMENT": "PCGUARD"}
env={"AFL_INSTRUMENT": "PCGUARD", "PATH": os.environ["PATH"]},
) )
if returncode != 0: if returncode != 0: sys.exit(red(f" [*] Error: afl-cc is unable to compile: {stderr} {stdout}"))
sys.exit(f'{red(f" [*] Error: afl-cc is unable to compile: {stderr} {stdout}")}')
async def cool_down() -> None: async def run_command(cmd: str, env: dict) -> (int | None, bytes, bytes):
"""Avoid the next test run's results being contaminated by e.g. thermal limits hit on this one.""" debug(f"Launching command: {cmd} with env {env}")
global between_tests p = await asyncio.create_subprocess_exec(
if between_tests: *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=env
print(f'{blue("Taking a five second break to stay cool between tests.")}') )
await asyncio.sleep(10)
else:
between_tests = True
async def run_command(cmd, env) -> (int | None, bytes, bytes):
if args.debug:
print(blue(f"Launching command: {cmd} with env {env}"))
p = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=env)
stdout, stderr = await p.communicate() stdout, stderr = await p.communicate()
debug(f"Output: {stdout.decode()} {stderr.decode()}")
return (p.returncode, stdout, stderr) return (p.returncode, stdout, stderr)
async def colon_value_or_none(filename: str, searchKey: str) -> str | None: async def colon_value_or_none(filename: str, searchKey: str) -> str | None:
"""Read a value (e.g. 'cpu MHz : 4976.109') given its filename and key.""" """Return a colon-separated value given a key in a file, e.g. 'cpu MHz : 4976.109')"""
with open(filename, "r") as fh: with open(filename, "r") as fh:
for line in fh: kv_pairs = (line.split(": ", 1) for line in fh if ": " in line)
kv = line.split(": ", 1) return next((v.rstrip() for k, v in kv_pairs if k.rstrip() == searchKey), None)
if kv and len(kv) == 2:
(key, value) = kv
key = key.strip()
value = value.strip()
if key == searchKey:
return value
return None
async def save_benchmark_results() -> None: async def save_benchmark_results() -> None:
"""We want a consistent JSON file, so read in the existing one, append, and replace.""" """Append a single row to the benchmark results in JSON Lines format (simple to write and to diff)."""
with open("benchmark-results.json", "r+") as jsonfile: with open("benchmark-results.jsonl", "a") as jsonfile:
current_benchmarks = json.load(jsonfile) json.dump(results, jsonfile, sort_keys=True)
current_benchmarks.append(results) jsonfile.write("\n")
jsonfile.seek(0) print(blue(f" [*] Results have been written to {jsonfile.name}"))
jsonfile.write(json.dumps(current_benchmarks, indent=2))
jsonfile.truncate()
print(json.dumps(results, indent=2))
async def main() -> None: async def main() -> None:
print(f'{gray(" [*] Preparing environment")}') print(" [*] Preparing environment")
# Remove stale files, if necessary.
try: try:
await clean_up() await clean_up_tempfiles()
except FileNotFoundError: except FileNotFoundError:
pass pass
await check_deps() await check_deps()
# Only record the first core's speed for now, even though it can vary between cores.
results["hardware"]["cpu_mhz"] = float(await colon_value_or_none("/proc/cpuinfo", "cpu MHz"))
results["hardware"]["cpu_model"] = await colon_value_or_none("/proc/cpuinfo", "model name")
results["hardware"]["cpu_threads"] = cpu_count
env_vars = await prep_env() env_vars = await prep_env()
cpu_count = multiprocessing.cpu_count() print(f" [*] Ready, starting benchmark...")
results["cpu_model"] = await colon_value_or_none("/proc/cpuinfo", "model name")
results["cpu_mhz"] = await colon_value_or_none("/proc/cpuinfo", "cpu MHz")
print(f'{gray(" [*] Ready, starting benchmark - this will take approx 1-2 minutes...")}')
for target in targets: for target in targets:
await compile_target(target["source"], target["binary"]) (source, binary) = [target.source, target.binary]
await compile_target(source, binary)
for mode in modes: for mode in modes:
await cool_down() execs_per_sec, execs_total, run_time_total = ([] for _ in range(3))
print(f" [*] {mode} {target['binary']} benchmark starting, execs/s: ", end="", flush=True) for run in range(0, args.runs):
if mode == "single-core": print(gray(f" [*] {mode.name} {binary} run {run+1} of {args.runs}, execs/s: "), end="", flush=True)
cpus = [0] fuzzers = range(0, args.fuzzers if mode == Mode.multicore else 1)
elif mode == "multi-core": outdir = f"{args.basedir}/out-{mode.name}-{binary}"
cpus = range(0, cpu_count) cmds = []
basedir = f"/tmp/out-{mode}-{target['binary']}-" for (idx, afl) in enumerate(fuzzers):
cmd = [["afl-fuzz", "-i", "in", "-o", f"{basedir}{cpu}", "-M", f"{cpu}", "-s", "123", "-D", f"./{target['binary']}"] for cpu in cpus] name = ["-o", outdir, "-M" if idx == 0 else "-S", str(afl)]
cmds.append(["afl-fuzz", "-i", f"{args.basedir}/in"] + name + ["-s", "123", "-D", f"./{binary}"])
# Here's where we schedule the tasks, and then block waiting for them to finish. # Prepare the afl-fuzz tasks, and then block here while waiting for them to finish.
tasks = [run_command(cmd[cpu], env_vars) for cpu in cpus] tasks = [run_command(cmds[cpu], env_vars) for cpu in fuzzers]
output = await asyncio.gather(*tasks) start = time.time()
await asyncio.gather(*tasks)
end = time.time()
if args.debug: # Our score is the sum of all execs_per_sec entries in fuzzer_stats files for the run.
for (_, stdout, stderr) in output: tasks = [colon_value_or_none(f"{outdir}/{afl}/fuzzer_stats", "execs_per_sec") for afl in fuzzers]
print(blue(f"Output: {stdout.decode()} {stderr.decode()}")) all_execs_per_sec = await asyncio.gather(*tasks)
execs = sum([Decimal(await colon_value_or_none(f"{basedir}{cpu}/{cpu}/fuzzer_stats", "execs_per_sec")) for cpu in cpus]) execs = sum([Decimal(count) for count in all_execs_per_sec if count is not None])
print(green(execs)) print(green(execs))
results["targets"][target["binary"]][mode]["execs_per_second"] = str(execs) execs_per_sec.append(execs)
results["targets"][target["binary"]][mode]["cores_used"] = len(cpus)
print("\nComparison: (note that values can change by 10-20% per run)") # Also gather execs_total and total_run_time for this run.
with open("COMPARISON", "r") as f: tasks = [colon_value_or_none(f"{outdir}/{afl}/fuzzer_stats", "execs_done") for afl in fuzzers]
print(f.read()) all_execs_total = await asyncio.gather(*tasks)
await clean_up() execs_total.append(sum([Decimal(count) for count in all_execs_total if count is not None]))
run_time_total.append(Decimal(end - start))
total_run_time = round(Decimal(sum(run_time_total)), 2)
avg_score = round(Decimal(sum(execs_per_sec) / len(execs_per_sec)), 2)
results["targets"][binary][mode.name] = {
"execs_per_second": float(avg_score),
"execs_total": int(sum([Decimal(execs) for execs in execs_total])),
"fuzzers_used": len(fuzzers),
"total_run_time": float(total_run_time),
}
print(f" [*] Average score for this test across all runs was: {green(avg_score)}")
if (((max(execs_per_sec) - min(execs_per_sec)) / avg_score) * 100) > 15:
print(yellow(" [*] The difference between your slowest and fastest runs was >15%, maybe try again?"))
await clean_up_tempfiles()
await save_benchmark_results() await save_benchmark_results()
if __name__ == "__main__": if __name__ == "__main__":