benchmark: remove self-calculation of execs/sec

2025-06-16 03:48:08 +00:00 · 2023-11-19 14:08:40 -08:00
parent 43b8812c5c
commit 4d8df780ed
1 changed files with 23 additions and 36 deletions
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # Part of the aflplusplus project, requires Python 3.8+.
 # Author: Chris Ball <chris@printf.net>, ported from Marc "van Hauser" Heuse's "benchmark.sh".
-import argparse, asyncio, datetime, json, multiprocessing, os, platform, re, shutil, sys
+import argparse, asyncio, json, multiprocessing, os, platform, re, shutil, sys
 from dataclasses import asdict, dataclass
 from decimal import Decimal
 from enum import Enum, auto
@ -23,13 +23,9 @@ class Target:
@dataclass
 class Run:
-    afl_execs_per_sec: float
+    execs_per_sec: float
-    afl_execs_total: float
+    execs_total: float
    fuzzers_used: int
    run_end: str
    run_start: str
    total_execs_per_sec: float
    total_run_time: float
@dataclass
 class Config:
@ -68,7 +64,7 @@ env_vars = {
 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 parser.add_argument("-b", "--basedir", help="directory to use for temp files", type=str, default="/tmp/aflpp-benchmark")
 parser.add_argument("-d", "--debug", help="show verbose debugging output", action="store_true")
-parser.add_argument("-r", "--runs", help="how many runs to average results over", type=int, default=3)
+parser.add_argument("-r", "--runs", help="how many runs to average results over", type=int, default=2)
 parser.add_argument("-f", "--fuzzers", help="how many afl-fuzz workers to use", type=int, default=cpu_count)
 parser.add_argument("-m", "--mode", help="pick modes", action="append", default=modes, choices=modes)
 parser.add_argument("-c", "--comment", help="add a comment about your setup", type=str, default="")
@ -150,8 +146,8 @@ async def check_deps() -> None:
    compiler = ""
    target_arch = ""
    for line in stderr.decode().split("\n"):
-        if m := re.match(r"(.* clang version .*?)\s", line):
+        if "clang version" in line:
-            compiler = m.group(1)
+            compiler = line
        elif m := re.match(r"^Target: (.*)", line):
            target_arch = m.group(1)
@ -200,15 +196,17 @@ async def save_benchmark_results() -> None:
            print(blue(f" [*] Results have not been written to the COMPARISON file; this CPU is already present."))
            return
        cpu_mhz = str(round(results.hardware.cpu_fastest_core_mhz)).ljust(5)
-        if "test-instr-persist-shmem" in results.targets and "multicore" in results.targets["test-instr-persist-shmem"]:
+        if not "test-instr-persist-shmem" in results.targets or \
-            if results.targets["test-instr-persist-shmem"]["singlecore"] is None or \
+           not "multicore" in results.targets["test-instr-persist-shmem"] or \
-               results.targets["test-instr-persist-shmem"]["multicore"] is None:
+           not "singlecore" in results.targets["test-instr-persist-shmem"] or \
-                return
+           results.targets["test-instr-persist-shmem"]["singlecore"] is None or \
-            single = str(round(results.targets["test-instr-persist-shmem"]["singlecore"].afl_execs_per_sec)).ljust(10)
+           results.targets["test-instr-persist-shmem"]["multicore"] is None:
-            multi = str(round(results.targets["test-instr-persist-shmem"]["multicore"].afl_execs_per_sec)).ljust(9)
+            return
-            cores = str(args.fuzzers).ljust(7)
+        single = str(round(results.targets["test-instr-persist-shmem"]["singlecore"].afl_execs_per_sec)).ljust(10)
-            comparisonfile.write(f"{cpu_model} | {cpu_mhz} | {cores} | {single} | {multi} | {aflconfig} |\n")
+        multi = str(round(results.targets["test-instr-persist-shmem"]["multicore"].afl_execs_per_sec)).ljust(9)
-            print(blue(f" [*] Results have been written to the COMPARISON file."))
+        cores = str(args.fuzzers).ljust(7)
        comparisonfile.write(f"{cpu_model} | {cpu_mhz} | {cores} | {single} | {multi} | {aflconfig} |\n")
        print(blue(f" [*] Results have been written to the COMPARISON file."))
    with open("COMPARISON", "r") as comparisonfile:
        print(comparisonfile.read())
@ -240,7 +238,7 @@ async def main() -> None:
            if mode == Mode.multicore:
                print(blue(f" [*] Using {args.fuzzers} fuzzers for multicore fuzzing "), end="")
                print(blue("(use --fuzzers to override)." if args.fuzzers == cpu_count else f"(the default is {cpu_count})"))
-            afl_execs_per_sec, execs_total, run_time_total = ([] for _ in range(3))
+            execs_per_sec, execs_total = ([] for _ in range(2))
            for run_idx in range(0, args.runs):
                print(gray(f" [*] {mode.name} {binary} run {run_idx+1} of {args.runs}, execs/s: "), end="", flush=True)
                fuzzers = range(0, args.fuzzers if mode == Mode.multicore else 1)
@ -249,41 +247,30 @@ async def main() -> None:
                for fuzzer_idx, afl in enumerate(fuzzers):
                    name = ["-o", outdir, "-M" if fuzzer_idx == 0 else "-S", str(afl)]
                    cmds.append(["afl-fuzz", "-i", f"{args.basedir}/in"] + name + ["-s", "123", "-V10", "-D", f"./{binary}"])
                # Prepare the afl-fuzz tasks, and then block while waiting for them to finish.
                fuzztasks = [run_command(cmds[cpu]) for cpu in fuzzers]
                start_time = datetime.datetime.now()
                await asyncio.gather(*fuzztasks)
                end_time = datetime.datetime.now()
                afl_versions = await colon_values(f"{outdir}/0/fuzzer_stats", "afl_version")
                if results.config:
                    results.config.afl_version = afl_versions[0]
                # Our score is the sum of all execs_per_sec entries in fuzzer_stats files for the run.
                sectasks = [colon_values(f"{outdir}/{afl}/fuzzer_stats", "execs_per_sec") for afl in fuzzers]
                all_execs_per_sec = await asyncio.gather(*sectasks)
                execs = sum([Decimal(count[0]) for count in all_execs_per_sec])
                print(green(execs))
-                afl_execs_per_sec.append(execs)
+                execs_per_sec.append(execs)
                # Also gather execs_total and total_run_time for this run.
                exectasks = [colon_values(f"{outdir}/{afl}/fuzzer_stats", "execs_done") for afl in fuzzers]
                all_execs_total = await asyncio.gather(*exectasks)
                execs_total.append(sum([Decimal(count[0]) for count in all_execs_total]))
                run_time_total.append((end_time - start_time).total_seconds())
            # (Using float() because Decimal() is not JSON-serializable.)
-            avg_afl_execs_per_sec = round(Decimal(sum(afl_execs_per_sec) / len(afl_execs_per_sec)), 2)
+            avg_afl_execs_per_sec = round(Decimal(sum(execs_per_sec) / len(execs_per_sec)), 2)
            afl_execs_total = int(sum([Decimal(execs) for execs in execs_total]))
-            total_run_time = float(round(Decimal(sum(run_time_total)), 2))
+            run = Run(execs_per_sec=float(avg_afl_execs_per_sec), execs_total=afl_execs_total, fuzzers_used=len(fuzzers))
            total_execs_per_sec = float(round(Decimal(afl_execs_total / total_run_time), 2))
            run = Run(afl_execs_per_sec=float(avg_afl_execs_per_sec), afl_execs_total=afl_execs_total,
                      fuzzers_used=len(fuzzers), run_end=str(end_time), run_start=str(start_time),
                      total_execs_per_sec=total_execs_per_sec, total_run_time=total_run_time)
            results.targets[binary][mode.name] = run
-
+            print(f" [*] Average execs/sec for this test across all runs was: {green(avg_afl_execs_per_sec)}")
-            print(f" [*] Average AFL execs/sec for this test across all runs was: {green(avg_afl_execs_per_sec)}")
+            if (((max(execs_per_sec) - min(execs_per_sec)) / avg_afl_execs_per_sec) * 100) > 15:
            if (((max(afl_execs_per_sec) - min(afl_execs_per_sec)) / avg_afl_execs_per_sec) * 100) > 15:
                print(yellow(" [*] The difference between your slowest and fastest runs was >15%, maybe try again?"))
    await clean_up_tempfiles()