enable long-running integration tests (#654)

2025-06-17 04:18:07 +00:00 · 2021-03-10 17:03:15 -05:00
parent f055e73b13
commit f6a426cc07
1 changed files with 403 additions and 319 deletions
--- a/src/integration-tests/integration-test.py
+++ b/src/integration-tests/integration-test.py
@ -7,13 +7,13 @@
 """ Launch multiple templates using samples to verify Onefuzz works end-to-end """
 # NOTE:
-# 1. This script uses pre-built fuzzing samples from the onefuzz-samples project.
+# 1. This script uses an unpacked version of the `integration-test-results`
-#    https://github.com/microsoft/onefuzz-samples/releases/latest
+#    from the CI pipeline.
 #
-# 2. This script will create new pools & managed scalesets during the testing by
+#    Check out https://github.com/microsoft/onefuzz/actions/workflows/
-#    default.  To use pre-existing pools, specify `--user_pools os=pool_name`
+#       ci.yml?query=branch%3Amain+is%3Asuccess
 #
-# 3. For each stage, this script launches everything for the stage in batch, then
+# 2. For each stage, this script launches everything for the stage in batch, then
 #    checks on each of the created items for the stage.  This batch processing
 #    allows testing multiple components concurrently.
@ -30,7 +30,7 @@ from onefuzz.api import Command, Onefuzz
 from onefuzz.backend import ContainerWrapper, wait
 from onefuzz.cli import execute_api
 from onefuzztypes.enums import OS, ContainerType, TaskState, VmState
-from onefuzztypes.models import Job, Pool, Repro, Scaleset
+from onefuzztypes.models import Job, Pool, Repro, Scaleset, Task
 from onefuzztypes.primitives import Container, Directory, File, PoolName, Region
 from pydantic import BaseModel, Field
@ -39,6 +39,13 @@ WINDOWS_POOL = "linux-test"
 BUILD = "0"
 class TaskTestState(Enum):
    not_running = "not_running"
    running = "running"
    stopped = "stopped"
    failed = "failed"
 class TemplateType(Enum):
    libfuzzer = "libfuzzer"
    libfuzzer_dotnet = "libfuzzer_dotnet"
@ -54,7 +61,7 @@ class Integration(BaseModel):
    inputs: Optional[str]
    use_setup: bool = Field(default=False)
    nested_setup_dir: Optional[str]
-    wait_for_files: List[ContainerType]
+    wait_for_files: Dict[ContainerType, int]
    check_asan_log: Optional[bool] = Field(default=False)
    disable_check_debugger: Optional[bool] = Field(default=False)
    reboot_after_setup: Optional[bool] = Field(default=False)
@ -67,14 +74,18 @@ TARGETS: Dict[str, Integration] = {
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
    ),
    "linux-libfuzzer": Integration(
        template=TemplateType.libfuzzer,
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
+        wait_for_files={
            ContainerType.unique_reports: 1,
            ContainerType.coverage: 1,
            ContainerType.inputs: 2,
        },
        reboot_after_setup=True,
    ),
    "linux-libfuzzer-dotnet": Integration(
@ -84,7 +95,8 @@ TARGETS: Dict[str, Integration] = {
        nested_setup_dir="my-fuzzer",
        inputs="inputs",
        use_setup=True,
-        wait_for_files=[ContainerType.inputs, ContainerType.crashes],
+        wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
        test_repro=False,
    ),
    "linux-libfuzzer-aarch64-crosscompile": Integration(
        template=TemplateType.libfuzzer_qemu_user,
@ -92,28 +104,28 @@ TARGETS: Dict[str, Integration] = {
        target_exe="fuzz.exe",
        inputs="inputs",
        use_setup=True,
-        wait_for_files=[ContainerType.inputs, ContainerType.crashes],
+        wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
        test_repro=False,
    ),
    "linux-libfuzzer-rust": Integration(
        template=TemplateType.libfuzzer,
        os=OS.linux,
        target_exe="fuzz_target_1",
-        wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
+        wait_for_files={ContainerType.unique_reports: 1, ContainerType.coverage: 1},
    ),
    "linux-trivial-crash": Integration(
        template=TemplateType.radamsa,
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
    ),
    "linux-trivial-crash-asan": Integration(
        template=TemplateType.radamsa,
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
        check_asan_log=True,
        disable_check_debugger=True,
    ),
@ -122,89 +134,53 @@ TARGETS: Dict[str, Integration] = {
        os=OS.windows,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[
+        wait_for_files={
-            ContainerType.unique_reports,
+            ContainerType.inputs: 2,
-            ContainerType.coverage,
+            ContainerType.unique_reports: 1,
-        ],
+            ContainerType.coverage: 1,
        },
    ),
    "windows-trivial-crash": Integration(
        template=TemplateType.radamsa,
        os=OS.windows,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
    ),
 }
 class TestOnefuzz:
-    def __init__(
+    def __init__(self, onefuzz: Onefuzz, logger: logging.Logger, test_id: UUID) -> None:
        self,
        onefuzz: Onefuzz,
        logger: logging.Logger,
        *,
        pool_size: int,
        os_list: List[OS],
        targets: List[str],
        skip_cleanup: bool,
    ) -> None:
        self.of = onefuzz
        self.logger = logger
        self.pools: Dict[OS, Pool] = {}
-        self.project = "test-" + str(uuid4()).split("-")[0]
+        self.test_id = test_id
-        self.pool_size = pool_size
+        self.project = f"test-{self.test_id}"
        self.os = os_list
        self.targets = targets
        self.skip_cleanup = skip_cleanup
        # job_id -> Job
        self.jobs: Dict[UUID, Job] = {}
        # job_id -> List[container_url]
        self.containers: Dict[UUID, List[ContainerWrapper]] = {}
        # task_id -> job_id
        self.tasks: Dict[UUID, UUID] = {}
        self.job_os: Dict[UUID, OS] = {}
        self.successful_jobs: Set[UUID] = set()
        self.failed_jobs: Set[UUID] = set()
        self.failed_repro: Set[UUID] = set()
        # job_id -> Repro
        self.repros: Dict[UUID, Repro] = {}
        # job_id -> target
        self.target_jobs: Dict[UUID, str] = {}
    def setup(
        self,
        *,
        region: Optional[Region] = None,
-        user_pools: Optional[Dict[str, str]] = None,
+        pool_size: int,
        os_list: List[OS],
    ) -> None:
-        for entry in self.os:
+        for entry in os_list:
-            if user_pools and entry.name in user_pools:
+            name = PoolName(f"testpool-{entry.name}-{self.test_id}")
                self.logger.info(
                    "using existing pool: %s:%s", entry.name, user_pools[entry.name]
                )
                self.pools[entry] = self.of.pools.get(user_pools[entry.name])
            else:
                name = PoolName("pool-%s-%s" % (self.project, entry.name))
            self.logger.info("creating pool: %s:%s", entry.name, name)
            self.pools[entry] = self.of.pools.create(name, entry)
            self.logger.info("creating scaleset for pool: %s", name)
-                self.of.scalesets.create(name, self.pool_size, region=region)
+            self.of.scalesets.create(name, pool_size, region=region)
-    def launch(self, path: str) -> None:
+    def launch(
        self, path: Directory, *, os_list: List[OS], targets: List[str], duration=int
    ) -> None:
        """ Launch all of the fuzzing templates """
        for target, config in TARGETS.items():
-            if target not in self.targets:
+            if target not in targets:
                continue
-            if config.os not in self.os:
+            if config.os not in os_list:
                continue
            self.logger.info("launching: %s", target)
@ -230,7 +206,7 @@ class TestOnefuzz:
                    target_exe=target_exe,
                    inputs=inputs,
                    setup_dir=setup,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                    reboot_after_setup=config.reboot_after_setup or False,
                )
@ -245,7 +221,7 @@ class TestOnefuzz:
                    target_harness=config.target_exe,
                    inputs=inputs,
                    setup_dir=setup,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            elif config.template == TemplateType.libfuzzer_qemu_user:
@ -256,7 +232,7 @@ class TestOnefuzz:
                    self.pools[config.os].name,
                    inputs=inputs,
                    target_exe=target_exe,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            elif config.template == TemplateType.radamsa:
@ -270,7 +246,7 @@ class TestOnefuzz:
                    setup_dir=setup,
                    check_asan_log=config.check_asan_log or False,
                    disable_check_debugger=config.disable_check_debugger or False,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            elif config.template == TemplateType.afl:
@ -282,7 +258,7 @@ class TestOnefuzz:
                    target_exe=target_exe,
                    inputs=inputs,
                    setup_dir=setup,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            else:
@ -291,21 +267,9 @@ class TestOnefuzz:
            if not job:
                raise Exception("missing job")
-            self.containers[job.job_id] = []
+    def check_task(
-            for task in self.of.tasks.list(job_id=job.job_id):
+        self, job: Job, task: Task, scalesets: List[Scaleset]
-                self.tasks[task.task_id] = job.job_id
+    ) -> TaskTestState:
                self.containers[job.job_id] += [
                    ContainerWrapper(self.of.containers.get(x.name).sas_url)
                    for x in task.config.containers
                    if x.type in TARGETS[job.config.name].wait_for_files
                ]
            self.jobs[job.job_id] = job
            self.job_os[job.job_id] = config.os
            self.target_jobs[job.job_id] = target
    def check_task(self, task_id: UUID, scalesets: List[Scaleset]) -> Optional[str]:
        task = self.of.tasks.get(task_id)
        # Check if the scaleset the task is assigned is OK
        for scaleset in scalesets:
            if (
@ -313,154 +277,231 @@ class TestOnefuzz:
                and scaleset.pool_name == task.config.pool.pool_name
                and scaleset.state not in scaleset.state.available()
            ):
-                return "task scaleset failed: %s - %s - %s (%s)" % (
+                self.logger.error(
-                    self.jobs[self.tasks[task_id]].config.name,
+                    "task scaleset failed: %s - %s - %s (%s)",
                    job.config.name,
                    task.config.task.type.name,
                    scaleset.state.name,
                    scaleset.error,
                )
                return TaskTestState.failed
        task = self.of.tasks.get(task.task_id)
        # check if the task itself has an error
        if task.error is not None:
-            return "task failed: %s - %s - %s (%s)" % (
+            self.logger.error(
-                task_id,
+                "task failed: %s - %s (%s)",
-                self.jobs[self.tasks[task_id]].config.name,
+                job.config.name,
                task.config.task.type.name,
                task.error,
            )
            return TaskTestState.failed
-        # just in case someone else stopped the task
+        if task.state in [TaskState.stopped, TaskState.stopping]:
-        if task.state in TaskState.shutting_down():
+            return TaskTestState.stopped
-            return "task shutdown early: %s - %s" % (
+
-                self.jobs[self.tasks[task_id]].config.name,
+        if task.state == TaskState.running:
-                task.config.task.type.name,
+            return TaskTestState.running
        return TaskTestState.not_running
    def check_jobs(
        self, poll: bool = False, stop_on_complete_check: bool = False
    ) -> bool:
        """ Check all of the integration jobs """
        jobs: Dict[UUID, Job] = {x.job_id: x for x in self.get_jobs()}
        job_tasks: Dict[UUID, List[Task]] = {}
        check_containers: Dict[UUID, Dict[Container, Tuple[ContainerWrapper, int]]] = {}
        for job in jobs.values():
            if job.config.name not in TARGETS:
                self.logger.error("unknown job target: %s", job.config.name)
                continue
            tasks = self.of.jobs.tasks.list(job.job_id)
            job_tasks[job.job_id] = tasks
            check_containers[job.job_id] = {}
            for task in tasks:
                for container in task.config.containers:
                    if container.type in TARGETS[job.config.name].wait_for_files:
                        count = TARGETS[job.config.name].wait_for_files[container.type]
                        check_containers[job.job_id][container.name] = (
                            ContainerWrapper(
                                self.of.containers.get(container.name).sas_url
                            ),
                            count,
                        )
        return None
-    def check_jobs_impl(
+        self.success = True
-        self,
+        self.logger.info("checking %d jobs", len(jobs))
-    ) -> Tuple[bool, str, bool]:
+
        self.cleared = False
        def clear() -> None:
            if not self.cleared:
                self.cleared = True
                if poll:
                    print("")
-        if self.jobs:
+        def check_jobs_impl() -> Tuple[bool, str, bool]:
-            finished_job: Set[UUID] = set()
+            self.cleared = False
            failed_jobs: Set[UUID] = set()
            job_task_states: Dict[UUID, Set[TaskTestState]] = {}
-            # check all the containers we care about for the job
+            for job_id in check_containers:
-            for job_id in self.containers:
+                finished_containers: Set[Container] = set()
-                done: Set[ContainerWrapper] = set()
+                for (container_name, container_impl) in check_containers[
-                for container in self.containers[job_id]:
+                    job_id
-                    if len(container.list_blobs()) > 0:
+                ].items():
                    container_client, count = container_impl
                    if len(container_client.list_blobs()) >= count:
                        clear()
                        self.logger.info(
-                            "new files in: %s", container.client.container_name
+                            "found files for %s - %s",
                            jobs[job_id].config.name,
                            container_name,
                        )
-                        done.add(container)
+                        finished_containers.add(container_name)
-                for container in done:
+
-                    self.containers[job_id].remove(container)
+                for container_name in finished_containers:
-                if not self.containers[job_id]:
+                    del check_containers[job_id][container_name]
                    clear()
                    self.logger.info("finished: %s", self.jobs[job_id].config.name)
                    finished_job.add(job_id)
            # check all the tasks associated with the job
            if self.tasks:
            scalesets = self.of.scalesets.list()
-                for task_id in self.tasks:
+            for job_id in job_tasks:
-                    error = self.check_task(task_id, scalesets)
+                finished_tasks: Set[UUID] = set()
-                    if error is not None:
+                job_task_states[job_id] = set()
                for task in job_tasks[job_id]:
                    if job_id not in jobs:
                        continue
                    task_result = self.check_task(jobs[job_id], task, scalesets)
                    if task_result == TaskTestState.failed:
                        self.success = False
                        failed_jobs.add(job_id)
                    elif task_result == TaskTestState.stopped:
                        finished_tasks.add(task.task_id)
                    else:
                        job_task_states[job_id].add(task_result)
                job_tasks[job_id] = [
                    x for x in job_tasks[job_id] if x.task_id not in finished_tasks
                ]
            to_remove: Set[UUID] = set()
            for job in jobs.values():
                # stop tracking failed jobs
                if job.job_id in failed_jobs:
                    if job.job_id in check_containers:
                        del check_containers[job.job_id]
                    if job.job_id in job_tasks:
                        del job_tasks[job.job_id]
                    continue
                # stop checking containers once all the containers for the job
                # have checked out.
                if job.job_id in check_containers:
                    if not check_containers[job.job_id]:
                        clear()
-                        self.logger.error(error)
+                        self.logger.info(
-                        finished_job.add(self.tasks[task_id])
+                            "found files in all containers for %s", job.config.name
                        self.failed_jobs.add(self.tasks[task_id])
            # cleanup jobs that are done testing
            for job_id in finished_job:
                self.stop_template(
                    self.jobs[job_id].config.name, delete_containers=False
                        )
                        del check_containers[job.job_id]
-                for task_id, task_job_id in list(self.tasks.items()):
+                if job.job_id not in check_containers:
-                    if job_id == task_job_id:
+                    if job.job_id in job_task_states:
-                        del self.tasks[task_id]
+                        if set([TaskTestState.running]).issuperset(
                            job_task_states[job.job_id]
                        ):
                            del job_tasks[job.job_id]
-                if job_id in self.jobs:
+                if job.job_id not in job_tasks and job.job_id not in check_containers:
-                    self.successful_jobs.add(job_id)
+                    clear()
-                    del self.jobs[job_id]
+                    self.logger.info("%s completed", job.config.name)
                    to_remove.add(job.job_id)
-                if job_id in self.containers:
+            for job_id in to_remove:
-                    del self.containers[job_id]
+                if stop_on_complete_check:
                    self.stop_job(jobs[job_id])
                del jobs[job_id]
            msg = "waiting on: %s" % ",".join(
-            sorted(x.config.name for x in self.jobs.values())
+                sorted(x.config.name for x in jobs.values())
            )
-        if len(msg) > 80:
+            if poll and len(msg) > 80:
-            msg = "waiting on %d jobs" % len(self.jobs)
+                msg = "waiting on %d jobs" % len(jobs)
-        return (
+            if not jobs:
-            not bool(self.jobs),
+                msg = "done all tasks"
            msg,
            not bool(self.failed_jobs),
        )
-    def check_jobs(self) -> bool:
+            return (not bool(jobs), msg, self.success)
        """ Check all of the integration jobs """
        self.logger.info("checking jobs")
        return wait(self.check_jobs_impl)
-    def get_job_crash(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
+        if poll:
-        # get the crash container for a given job
+            return wait(check_jobs_impl)
        else:
            _, msg, result = check_jobs_impl()
            self.logger.info(msg)
            return result
    def get_job_crash_report(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
        for task in self.of.tasks.list(job_id=job_id, state=None):
            for container in task.config.containers:
-                if container.type != ContainerType.unique_reports:
+                if container.type not in [
                    ContainerType.unique_reports,
                    ContainerType.reports,
                ]:
                    continue
                files = self.of.containers.files.list(container.name)
                if len(files.files) > 0:
                    return (container.name, files.files[0])
        return None
-    def launch_repro(self) -> None:
+    def launch_repro(self) -> Tuple[bool, Dict[UUID, Tuple[Job, Repro]]]:
        # launch repro for one report from all succeessful jobs
        has_cdb = bool(which("cdb.exe"))
        has_gdb = bool(which("gdb"))
-        for job_id in self.successful_jobs:
+
-            if not TARGETS[self.target_jobs[job_id]].test_repro:
+        jobs = self.get_jobs()
-                self.logger.info("skipping repro for %s", self.target_jobs[job_id])
+
        result = True
        repros = {}
        for job in jobs:
            if not TARGETS[job.config.name].test_repro:
                self.logger.info("not testing repro for %s", job.config.name)
                continue
-            if self.job_os[job_id] == OS.linux and not has_gdb:
+            if TARGETS[job.config.name].os == OS.linux and not has_gdb:
                self.logger.warning(
-                    "missing gdb in path, not launching repro: %s",
+                    "skipping repro for %s, missing gdb", job.config.name
                    self.target_jobs[job_id],
                )
                continue
-            if self.job_os[job_id] == OS.windows and not has_cdb:
+            if TARGETS[job.config.name].os == OS.windows and not has_cdb:
                self.logger.warning(
-                    "missing cdb in path, not launching repro: %s",
+                    "skipping repro for %s, missing cdb", job.config.name
                    self.target_jobs[job_id],
                )
                continue
-            self.logger.info("launching repro: %s", self.target_jobs[job_id])
+            report = self.get_job_crash_report(job.job_id)
            report = self.get_job_crash(job_id)
            if report is None:
-                self.logger.warning(
+                self.logger.error(
-                    "target does not include crash reports: %s",
+                    "target does not include crash reports: %s", job.config.name
                    self.target_jobs[job_id],
                )
-                return
+                result = False
            else:
                self.logger.info("launching repro: %s", job.config.name)
                (container, path) = report
-            self.repros[job_id] = self.of.repro.create(container, path, duration=1)
+                repro = self.of.repro.create(container, path, duration=1)
                repros[job.job_id] = (job, repro)
-    def check_repro_impl(
+        return (result, repros)
-        self,
+
-    ) -> Tuple[bool, str, bool]:
+    def check_repro(self, repros: Dict[UUID, Tuple[Job, Repro]]) -> bool:
        self.logger.info("checking repros")
        self.success = True
        def check_repro_impl() -> Tuple[bool, str, bool]:
            # check all of the launched repros
            self.cleared = False
@ -475,20 +516,20 @@ class TestOnefuzz:
                OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
            }
-        info: Dict[str, List[str]] = {}
+            for (job, repro) in list(repros.values()):
                repros[job.job_id] = (job, self.of.repro.get(repro.vm_id))
-        done: Set[UUID] = set()
+            for (job, repro) in list(repros.values()):
        for job_id, repro in self.repros.items():
            repro = self.of.repro.get(repro.vm_id)
                if repro.error:
                    clear()
                    self.logger.error(
-                    "repro failed: %s: %s", self.target_jobs[job_id], repro.error
+                        "repro failed: %s: %s",
                        job.config.name,
                        repro.error,
                    )
-                self.failed_jobs.add(job_id)
+                    self.of.repro.delete(repro.vm_id)
-                done.add(job_id)
+                    del repros[job.job_id]
-            elif repro.state not in [VmState.init, VmState.extensions_launch]:
+                elif repro.state == VmState.running:
                done.add(job_id)
                    try:
                        result = self.of.repro.connect(
                            repro.vm_id,
@ -499,93 +540,72 @@ class TestOnefuzz:
                            commands[repro.os][1], result, re.MULTILINE
                        ):
                            clear()
-                        self.logger.info(
+                            self.logger.info("repro succeeded: %s", job.config.name)
                            "repro succeeded: %s", self.target_jobs[job_id]
                        )
                        self.failed_jobs.add(job_id)
                        done.add(job_id)
                        else:
                            clear()
                            self.logger.error(
-                            "repro failed: %s: %s", self.target_jobs[job_id], result
+                                "repro failed: %s - %s", job.config.name, result
                            )
-                        self.failed_jobs.add(job_id)
+                    except Exception as err:
                        done.add(job_id)
                except Exception as e:
                        clear()
                        self.logger.error("repro failed: %s - %s", job.config.name, err)
                    del repros[job.job_id]
                elif repro.state not in [VmState.init, VmState.extensions_launch]:
                    self.logger.error(
-                        "repro failed: %s: %s", self.target_jobs[job_id], repr(e)
+                        "repro failed: %s - bad state: %s", job.config.name, repro.state
                    )
-                    self.failed_jobs.add(job_id)
+                    del repros[job.job_id]
                    done.add(job_id)
            else:
                if repro.state.name not in info:
                    info[repro.state.name] = []
                info[repro.state.name].append(self.target_jobs[job_id])
-        for job_id in done:
+            repro_states: Dict[str, List[str]] = {}
-            self.of.repro.delete(self.repros[job_id].vm_id)
+            for (job, repro) in repros.values():
-            del self.repros[job_id]
+                if repro.state.name not in repro_states:
                    repro_states[repro.state.name] = []
                repro_states[repro.state.name].append(job.config.name)
            logline = []
-        for name in info:
+            for state in repro_states:
-            logline.append("%s:%s" % (name, ",".join(info[name])))
+                logline.append("%s:%s" % (state, ",".join(repro_states[state])))
            msg = "waiting repro: %s" % " ".join(logline)
-        if len(logline) > 80:
+            if len(msg) > 80:
-            msg = "waiting on %d repros" % len(self.repros)
+                msg = "waiting on %d repros" % len(repros)
            return (not bool(repros), msg, self.success)
-        return (
+        return wait(check_repro_impl)
            not bool(self.repros),
            msg,
            bool(self.failed_jobs),
        )
-    def check_repro(self) -> bool:
+    def get_jobs(self) -> List[Job]:
-        self.logger.info("checking repros")
+        jobs = self.of.jobs.list(job_state=None)
-        return wait(self.check_repro_impl)
+        jobs = [x for x in jobs if x.config.project == self.project]
        return jobs
-    def stop_template(self, target: str, delete_containers: bool = True) -> None:
+    def stop_job(self, job: Job, delete_containers: bool = False) -> None:
        """ stop a specific template """
        if self.skip_cleanup:
            self.logger.warning("not cleaning up target: %s", target)
        else:
        self.of.template.stop(
-                self.project,
+            job.config.project,
-                target,
+            job.config.name,
            BUILD,
            delete_containers=delete_containers,
                stop_notifications=True,
        )
-    def cleanup(self, *, user_pools: Optional[Dict[str, str]] = None) -> bool:
+    def get_pools(self) -> List[Pool]:
-        """ cleanup all of the integration pools & jobs """
+        pools = self.of.pools.list()
        pools = [x for x in pools if x.name == f"testpool-{x.os.name}-{self.test_id}"]
        return pools
-        if self.skip_cleanup:
+    def cleanup(self) -> None:
-            self.logger.warning("not cleaning up")
+        """ cleanup all of the integration pools & jobs """
            return True
        self.logger.info("cleaning up")
        errors: List[Exception] = []
-        for target, config in TARGETS.items():
+        jobs = self.get_jobs()
-            if config.os not in self.os:
+        for job in jobs:
                continue
            if target not in self.targets:
                continue
            try:
-                self.logger.info("stopping %s", target)
+                self.stop_job(job, delete_containers=True)
                self.stop_template(target, delete_containers=False)
            except Exception as e:
-                self.logger.error("cleanup of %s failed", target)
+                self.logger.error("cleanup of job failed: %s - %s", job, e)
                errors.append(e)
-        for pool in self.pools.values():
+        for pool in self.get_pools():
            if user_pools and pool.name in user_pools.values():
                continue
            self.logger.info(
                "halting: %s:%s:%s", pool.name, pool.os.name, pool.arch.name
            )
@ -595,52 +615,115 @@ class TestOnefuzz:
                self.logger.error("cleanup of pool failed: %s - %s", pool.name, e)
                errors.append(e)
-        for repro in self.repros.values():
+        container_names = set()
        for job in jobs:
            for task in self.of.tasks.list(job_id=job.job_id, state=None):
                for container in task.config.containers:
                    if container.type in [
                        ContainerType.reports,
                        ContainerType.unique_reports,
                    ]:
                        container_names.add(container.name)
        for repro in self.of.repro.list():
            if repro.config.container in container_names:
                try:
                    self.of.repro.delete(repro.vm_id)
                except Exception as e:
-                self.logger.error("cleanup of repro failed: %s - %s", repro.vm_id, e)
+                    self.logger.error("cleanup of repro failed: %s %s", repro.vm_id, e)
                    errors.append(e)
-        return not bool(errors)
+        if errors:
            raise Exception("cleanup failed")
 class Run(Command):
    def check_jobs(
        self,
        test_id: UUID,
        *,
        endpoint: Optional[str],
        poll: bool = False,
        stop_on_complete_check: bool = False,
    ) -> None:
        self.onefuzz.__setup__(endpoint=endpoint)
        tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
        result = tester.check_jobs(
            poll=poll, stop_on_complete_check=stop_on_complete_check
        )
        if not result:
            raise Exception("jobs failed")
    def check_repros(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
        self.onefuzz.__setup__(endpoint=endpoint)
        tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
        launch_result, repros = tester.launch_repro()
        result = tester.check_repro(repros)
        if not (result and launch_result):
            raise Exception("repros failed")
    def launch(
        self,
        samples: Directory,
        *,
        endpoint: Optional[str] = None,
        pool_size: int = 10,
        region: Optional[Region] = None,
        os_list: List[OS] = [OS.linux, OS.windows],
        targets: List[str] = list(TARGETS.keys()),
        test_id: Optional[UUID] = None,
        duration: int = 1,
    ) -> UUID:
        if test_id is None:
            test_id = uuid4()
        self.logger.info("launching test_id: %s", test_id)
        self.onefuzz.__setup__(endpoint=endpoint)
        tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
        tester.setup(region=region, pool_size=pool_size, os_list=os_list)
        tester.launch(samples, os_list=os_list, targets=targets, duration=duration)
        return test_id
    def cleanup(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
        self.onefuzz.__setup__(endpoint=endpoint)
        tester = TestOnefuzz(self.onefuzz, self.logger, test_id=test_id)
        tester.cleanup()
    def test(
        self,
        samples: Directory,
        *,
        endpoint: Optional[str] = None,
-        user_pools: Optional[Dict[str, str]] = None,
+        pool_size: int = 15,
        pool_size: int = 10,
        region: Optional[Region] = None,
        os_list: List[OS] = [OS.linux, OS.windows],
        targets: List[str] = list(TARGETS.keys()),
        skip_repro: bool = False,
-        skip_cleanup: bool = False,
+        duration: int = 1,
    ) -> None:
        self.onefuzz.__setup__(endpoint=endpoint)
        tester = TestOnefuzz(
            self.onefuzz,
            self.logger,
            pool_size=pool_size,
            os_list=os_list,
            targets=targets,
            skip_cleanup=skip_cleanup,
        )
        success = True
        test_id = uuid4()
        error: Optional[Exception] = None
        try:
-            tester.setup(region=region, user_pools=user_pools)
+            self.launch(
-            tester.launch(samples)
+                samples,
-            tester.check_jobs()
+                endpoint=endpoint,
                pool_size=pool_size,
                region=region,
                os_list=os_list,
                targets=targets,
                test_id=test_id,
                duration=duration,
            )
            self.check_jobs(
                test_id, endpoint=endpoint, poll=True, stop_on_complete_check=True
            )
            if skip_repro:
                self.logger.warning("not testing crash repro")
            else:
-                self.logger.info("launching crash repro tests")
+                self.check_repros(test_id, endpoint=endpoint)
                tester.launch_repro()
                tester.check_repro()
        except Exception as e:
            self.logger.error("testing failed: %s", repr(e))
            error = e
@ -649,10 +732,11 @@ class Run(Command):
            self.logger.error("interrupted testing")
            success = False
-        if not tester.cleanup(user_pools=user_pools):
+        try:
-            success = False
+            self.cleanup(test_id, endpoint=endpoint)
-
+        except Exception as e:
-        if tester.failed_jobs or tester.failed_repro:
+            self.logger.error("testing failed: %s", repr(e))
            error = e
            success = False
        if error: