enable long-running integration tests (#654)

2025-06-14 11:08:06 +00:00 · 2021-03-10 17:03:15 -05:00
parent f055e73b13
commit f6a426cc07
1 changed files with 403 additions and 319 deletions
--- a/src/integration-tests/integration-test.py
+++ b/src/integration-tests/integration-test.py
@ -7,13 +7,13 @@
 """ Launch multiple templates using samples to verify Onefuzz works end-to-end """

 # NOTE:
-# 1. This script uses pre-built fuzzing samples from the onefuzz-samples project.
-#    https://github.com/microsoft/onefuzz-samples/releases/latest
+# 1. This script uses an unpacked version of the `integration-test-results`
+#    from the CI pipeline.
 #
-# 2. This script will create new pools & managed scalesets during the testing by
-#    default.  To use pre-existing pools, specify `--user_pools os=pool_name`
+#    Check out https://github.com/microsoft/onefuzz/actions/workflows/
+#       ci.yml?query=branch%3Amain+is%3Asuccess
 #
-# 3. For each stage, this script launches everything for the stage in batch, then
+# 2. For each stage, this script launches everything for the stage in batch, then
 #    checks on each of the created items for the stage.  This batch processing
 #    allows testing multiple components concurrently.

@ -30,7 +30,7 @@ from onefuzz.api import Command, Onefuzz
 from onefuzz.backend import ContainerWrapper, wait
 from onefuzz.cli import execute_api
 from onefuzztypes.enums import OS, ContainerType, TaskState, VmState
-from onefuzztypes.models import Job, Pool, Repro, Scaleset
+from onefuzztypes.models import Job, Pool, Repro, Scaleset, Task
 from onefuzztypes.primitives import Container, Directory, File, PoolName, Region
 from pydantic import BaseModel, Field

@ -39,6 +39,13 @@ WINDOWS_POOL = "linux-test"
 BUILD = "0"


+class TaskTestState(Enum):
+    not_running = "not_running"
+    running = "running"
+    stopped = "stopped"
+    failed = "failed"
+
+
 class TemplateType(Enum):
    libfuzzer = "libfuzzer"
    libfuzzer_dotnet = "libfuzzer_dotnet"
@ -54,7 +61,7 @@ class Integration(BaseModel):
    inputs: Optional[str]
    use_setup: bool = Field(default=False)
    nested_setup_dir: Optional[str]
-    wait_for_files: List[ContainerType]
+    wait_for_files: Dict[ContainerType, int]
    check_asan_log: Optional[bool] = Field(default=False)
    disable_check_debugger: Optional[bool] = Field(default=False)
    reboot_after_setup: Optional[bool] = Field(default=False)
@ -67,14 +74,18 @@ TARGETS: Dict[str, Integration] = {
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
    ),
    "linux-libfuzzer": Integration(
        template=TemplateType.libfuzzer,
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
+        wait_for_files={
+            ContainerType.unique_reports: 1,
+            ContainerType.coverage: 1,
+            ContainerType.inputs: 2,
+        },
        reboot_after_setup=True,
    ),
    "linux-libfuzzer-dotnet": Integration(
@ -84,7 +95,8 @@ TARGETS: Dict[str, Integration] = {
        nested_setup_dir="my-fuzzer",
        inputs="inputs",
        use_setup=True,
-        wait_for_files=[ContainerType.inputs, ContainerType.crashes],
+        wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
+        test_repro=False,
    ),
    "linux-libfuzzer-aarch64-crosscompile": Integration(
        template=TemplateType.libfuzzer_qemu_user,
@ -92,28 +104,28 @@ TARGETS: Dict[str, Integration] = {
        target_exe="fuzz.exe",
        inputs="inputs",
        use_setup=True,
-        wait_for_files=[ContainerType.inputs, ContainerType.crashes],
+        wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
        test_repro=False,
    ),
    "linux-libfuzzer-rust": Integration(
        template=TemplateType.libfuzzer,
        os=OS.linux,
        target_exe="fuzz_target_1",
-        wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
+        wait_for_files={ContainerType.unique_reports: 1, ContainerType.coverage: 1},
    ),
    "linux-trivial-crash": Integration(
        template=TemplateType.radamsa,
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
    ),
    "linux-trivial-crash-asan": Integration(
        template=TemplateType.radamsa,
        os=OS.linux,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
        check_asan_log=True,
        disable_check_debugger=True,
    ),
@ -122,89 +134,53 @@ TARGETS: Dict[str, Integration] = {
        os=OS.windows,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[
-            ContainerType.unique_reports,
-            ContainerType.coverage,
-        ],
+        wait_for_files={
+            ContainerType.inputs: 2,
+            ContainerType.unique_reports: 1,
+            ContainerType.coverage: 1,
+        },
    ),
    "windows-trivial-crash": Integration(
        template=TemplateType.radamsa,
        os=OS.windows,
        target_exe="fuzz.exe",
        inputs="seeds",
-        wait_for_files=[ContainerType.unique_reports],
+        wait_for_files={ContainerType.unique_reports: 1},
    ),
 }


 class TestOnefuzz:
-    def __init__(
-        self,
-        onefuzz: Onefuzz,
-        logger: logging.Logger,
-        *,
-        pool_size: int,
-        os_list: List[OS],
-        targets: List[str],
-        skip_cleanup: bool,
-    ) -> None:
+    def __init__(self, onefuzz: Onefuzz, logger: logging.Logger, test_id: UUID) -> None:
        self.of = onefuzz
        self.logger = logger
        self.pools: Dict[OS, Pool] = {}
-        self.project = "test-" + str(uuid4()).split("-")[0]
-        self.pool_size = pool_size
-        self.os = os_list
-        self.targets = targets
-        self.skip_cleanup = skip_cleanup
-
-        # job_id -> Job
-        self.jobs: Dict[UUID, Job] = {}
-
-        # job_id -> List[container_url]
-        self.containers: Dict[UUID, List[ContainerWrapper]] = {}
-
-        # task_id -> job_id
-        self.tasks: Dict[UUID, UUID] = {}
-
-        self.job_os: Dict[UUID, OS] = {}
-
-        self.successful_jobs: Set[UUID] = set()
-        self.failed_jobs: Set[UUID] = set()
-        self.failed_repro: Set[UUID] = set()
-
-        # job_id -> Repro
-        self.repros: Dict[UUID, Repro] = {}
-
-        # job_id -> target
-        self.target_jobs: Dict[UUID, str] = {}
+        self.test_id = test_id
+        self.project = f"test-{self.test_id}"

    def setup(
        self,
        *,
        region: Optional[Region] = None,
-        user_pools: Optional[Dict[str, str]] = None,
+        pool_size: int,
+        os_list: List[OS],
    ) -> None:
-        for entry in self.os:
-            if user_pools and entry.name in user_pools:
-                self.logger.info(
-                    "using existing pool: %s:%s", entry.name, user_pools[entry.name]
-                )
-                self.pools[entry] = self.of.pools.get(user_pools[entry.name])
-            else:
-                name = PoolName("pool-%s-%s" % (self.project, entry.name))
-                self.logger.info("creating pool: %s:%s", entry.name, name)
-                self.pools[entry] = self.of.pools.create(name, entry)
-                self.logger.info("creating scaleset for pool: %s", name)
-                self.of.scalesets.create(name, self.pool_size, region=region)
+        for entry in os_list:
+            name = PoolName(f"testpool-{entry.name}-{self.test_id}")
+            self.logger.info("creating pool: %s:%s", entry.name, name)
+            self.pools[entry] = self.of.pools.create(name, entry)
+            self.logger.info("creating scaleset for pool: %s", name)
+            self.of.scalesets.create(name, pool_size, region=region)

-    def launch(self, path: str) -> None:
+    def launch(
+        self, path: Directory, *, os_list: List[OS], targets: List[str], duration=int
+    ) -> None:
        """ Launch all of the fuzzing templates """
-
        for target, config in TARGETS.items():
-            if target not in self.targets:
+            if target not in targets:
                continue

-            if config.os not in self.os:
+            if config.os not in os_list:
                continue

            self.logger.info("launching: %s", target)
@ -230,7 +206,7 @@ class TestOnefuzz:
                    target_exe=target_exe,
                    inputs=inputs,
                    setup_dir=setup,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                    reboot_after_setup=config.reboot_after_setup or False,
                )
@ -245,7 +221,7 @@ class TestOnefuzz:
                    target_harness=config.target_exe,
                    inputs=inputs,
                    setup_dir=setup,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            elif config.template == TemplateType.libfuzzer_qemu_user:
@ -256,7 +232,7 @@ class TestOnefuzz:
                    self.pools[config.os].name,
                    inputs=inputs,
                    target_exe=target_exe,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            elif config.template == TemplateType.radamsa:
@ -270,7 +246,7 @@ class TestOnefuzz:
                    setup_dir=setup,
                    check_asan_log=config.check_asan_log or False,
                    disable_check_debugger=config.disable_check_debugger or False,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            elif config.template == TemplateType.afl:
@ -282,7 +258,7 @@ class TestOnefuzz:
                    target_exe=target_exe,
                    inputs=inputs,
                    setup_dir=setup,
-                    duration=1,
+                    duration=duration,
                    vm_count=1,
                )
            else:
@ -291,21 +267,9 @@ class TestOnefuzz:
            if not job:
                raise Exception("missing job")

-            self.containers[job.job_id] = []
-            for task in self.of.tasks.list(job_id=job.job_id):
-                self.tasks[task.task_id] = job.job_id
-                self.containers[job.job_id] += [
-                    ContainerWrapper(self.of.containers.get(x.name).sas_url)
-                    for x in task.config.containers
-                    if x.type in TARGETS[job.config.name].wait_for_files
-                ]
-            self.jobs[job.job_id] = job
-            self.job_os[job.job_id] = config.os
-            self.target_jobs[job.job_id] = target
-
-    def check_task(self, task_id: UUID, scalesets: List[Scaleset]) -> Optional[str]:
-        task = self.of.tasks.get(task_id)
-
+    def check_task(
+        self, job: Job, task: Task, scalesets: List[Scaleset]
+    ) -> TaskTestState:
        # Check if the scaleset the task is assigned is OK
        for scaleset in scalesets:
            if (
@ -313,279 +277,335 @@ class TestOnefuzz:
                and scaleset.pool_name == task.config.pool.pool_name
                and scaleset.state not in scaleset.state.available()
            ):
-                return "task scaleset failed: %s - %s - %s (%s)" % (
-                    self.jobs[self.tasks[task_id]].config.name,
+                self.logger.error(
+                    "task scaleset failed: %s - %s - %s (%s)",
+                    job.config.name,
                    task.config.task.type.name,
                    scaleset.state.name,
                    scaleset.error,
                )
+                return TaskTestState.failed
+
+        task = self.of.tasks.get(task.task_id)

        # check if the task itself has an error
        if task.error is not None:
-            return "task failed: %s - %s - %s (%s)" % (
-                task_id,
-                self.jobs[self.tasks[task_id]].config.name,
+            self.logger.error(
+                "task failed: %s - %s (%s)",
+                job.config.name,
                task.config.task.type.name,
                task.error,
            )
+            return TaskTestState.failed

-        # just in case someone else stopped the task
-        if task.state in TaskState.shutting_down():
-            return "task shutdown early: %s - %s" % (
-                self.jobs[self.tasks[task_id]].config.name,
-                task.config.task.type.name,
-            )
-        return None
+        if task.state in [TaskState.stopped, TaskState.stopping]:
+            return TaskTestState.stopped
+
+        if task.state == TaskState.running:
+            return TaskTestState.running
+
+        return TaskTestState.not_running
+
+    def check_jobs(
+        self, poll: bool = False, stop_on_complete_check: bool = False
+    ) -> bool:
+        """ Check all of the integration jobs """
+        jobs: Dict[UUID, Job] = {x.job_id: x for x in self.get_jobs()}
+        job_tasks: Dict[UUID, List[Task]] = {}
+        check_containers: Dict[UUID, Dict[Container, Tuple[ContainerWrapper, int]]] = {}
+
+        for job in jobs.values():
+            if job.config.name not in TARGETS:
+                self.logger.error("unknown job target: %s", job.config.name)
+                continue
+
+            tasks = self.of.jobs.tasks.list(job.job_id)
+            job_tasks[job.job_id] = tasks
+            check_containers[job.job_id] = {}
+            for task in tasks:
+                for container in task.config.containers:
+                    if container.type in TARGETS[job.config.name].wait_for_files:
+                        count = TARGETS[job.config.name].wait_for_files[container.type]
+                        check_containers[job.job_id][container.name] = (
+                            ContainerWrapper(
+                                self.of.containers.get(container.name).sas_url
+                            ),
+                            count,
+                        )
+
+        self.success = True
+        self.logger.info("checking %d jobs", len(jobs))

-    def check_jobs_impl(
-        self,
-    ) -> Tuple[bool, str, bool]:
        self.cleared = False

        def clear() -> None:
            if not self.cleared:
                self.cleared = True
-                print("")
+                if poll:
+                    print("")

-        if self.jobs:
-            finished_job: Set[UUID] = set()
+        def check_jobs_impl() -> Tuple[bool, str, bool]:
+            self.cleared = False
+            failed_jobs: Set[UUID] = set()
+            job_task_states: Dict[UUID, Set[TaskTestState]] = {}

-            # check all the containers we care about for the job
-            for job_id in self.containers:
-                done: Set[ContainerWrapper] = set()
-                for container in self.containers[job_id]:
-                    if len(container.list_blobs()) > 0:
+            for job_id in check_containers:
+                finished_containers: Set[Container] = set()
+                for (container_name, container_impl) in check_containers[
+                    job_id
+                ].items():
+                    container_client, count = container_impl
+                    if len(container_client.list_blobs()) >= count:
                        clear()
                        self.logger.info(
-                            "new files in: %s", container.client.container_name
+                            "found files for %s - %s",
+                            jobs[job_id].config.name,
+                            container_name,
                        )
-                        done.add(container)
-                for container in done:
-                    self.containers[job_id].remove(container)
-                if not self.containers[job_id]:
-                    clear()
-                    self.logger.info("finished: %s", self.jobs[job_id].config.name)
-                    finished_job.add(job_id)
+                        finished_containers.add(container_name)

-            # check all the tasks associated with the job
-            if self.tasks:
-                scalesets = self.of.scalesets.list()
-                for task_id in self.tasks:
-                    error = self.check_task(task_id, scalesets)
-                    if error is not None:
+                for container_name in finished_containers:
+                    del check_containers[job_id][container_name]
+
+            scalesets = self.of.scalesets.list()
+            for job_id in job_tasks:
+                finished_tasks: Set[UUID] = set()
+                job_task_states[job_id] = set()
+
+                for task in job_tasks[job_id]:
+                    if job_id not in jobs:
+                        continue
+
+                    task_result = self.check_task(jobs[job_id], task, scalesets)
+                    if task_result == TaskTestState.failed:
+                        self.success = False
+                        failed_jobs.add(job_id)
+                    elif task_result == TaskTestState.stopped:
+                        finished_tasks.add(task.task_id)
+                    else:
+                        job_task_states[job_id].add(task_result)
+                job_tasks[job_id] = [
+                    x for x in job_tasks[job_id] if x.task_id not in finished_tasks
+                ]
+
+            to_remove: Set[UUID] = set()
+            for job in jobs.values():
+                # stop tracking failed jobs
+                if job.job_id in failed_jobs:
+                    if job.job_id in check_containers:
+                        del check_containers[job.job_id]
+                    if job.job_id in job_tasks:
+                        del job_tasks[job.job_id]
+                    continue
+
+                # stop checking containers once all the containers for the job
+                # have checked out.
+                if job.job_id in check_containers:
+                    if not check_containers[job.job_id]:
                        clear()
-                        self.logger.error(error)
-                        finished_job.add(self.tasks[task_id])
-                        self.failed_jobs.add(self.tasks[task_id])
+                        self.logger.info(
+                            "found files in all containers for %s", job.config.name
+                        )
+                        del check_containers[job.job_id]

-            # cleanup jobs that are done testing
-            for job_id in finished_job:
-                self.stop_template(
-                    self.jobs[job_id].config.name, delete_containers=False
-                )
+                if job.job_id not in check_containers:
+                    if job.job_id in job_task_states:
+                        if set([TaskTestState.running]).issuperset(
+                            job_task_states[job.job_id]
+                        ):
+                            del job_tasks[job.job_id]

-                for task_id, task_job_id in list(self.tasks.items()):
-                    if job_id == task_job_id:
-                        del self.tasks[task_id]
+                if job.job_id not in job_tasks and job.job_id not in check_containers:
+                    clear()
+                    self.logger.info("%s completed", job.config.name)
+                    to_remove.add(job.job_id)

-                if job_id in self.jobs:
-                    self.successful_jobs.add(job_id)
-                    del self.jobs[job_id]
+            for job_id in to_remove:
+                if stop_on_complete_check:
+                    self.stop_job(jobs[job_id])
+                del jobs[job_id]

-                if job_id in self.containers:
-                    del self.containers[job_id]
+            msg = "waiting on: %s" % ",".join(
+                sorted(x.config.name for x in jobs.values())
+            )
+            if poll and len(msg) > 80:
+                msg = "waiting on %d jobs" % len(jobs)

-        msg = "waiting on: %s" % ",".join(
-            sorted(x.config.name for x in self.jobs.values())
-        )
-        if len(msg) > 80:
-            msg = "waiting on %d jobs" % len(self.jobs)
+            if not jobs:
+                msg = "done all tasks"

-        return (
-            not bool(self.jobs),
-            msg,
-            not bool(self.failed_jobs),
-        )
+            return (not bool(jobs), msg, self.success)

-    def check_jobs(self) -> bool:
-        """ Check all of the integration jobs """
-        self.logger.info("checking jobs")
-        return wait(self.check_jobs_impl)
-
-    def get_job_crash(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
-        # get the crash container for a given job
+        if poll:
+            return wait(check_jobs_impl)
+        else:
+            _, msg, result = check_jobs_impl()
+            self.logger.info(msg)
+            return result

+    def get_job_crash_report(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
        for task in self.of.tasks.list(job_id=job_id, state=None):
            for container in task.config.containers:
-                if container.type != ContainerType.unique_reports:
+                if container.type not in [
+                    ContainerType.unique_reports,
+                    ContainerType.reports,
+                ]:
                    continue
+
                files = self.of.containers.files.list(container.name)
                if len(files.files) > 0:
                    return (container.name, files.files[0])
        return None

-    def launch_repro(self) -> None:
+    def launch_repro(self) -> Tuple[bool, Dict[UUID, Tuple[Job, Repro]]]:
        # launch repro for one report from all succeessful jobs
        has_cdb = bool(which("cdb.exe"))
        has_gdb = bool(which("gdb"))
-        for job_id in self.successful_jobs:
-            if not TARGETS[self.target_jobs[job_id]].test_repro:
-                self.logger.info("skipping repro for %s", self.target_jobs[job_id])
+
+        jobs = self.get_jobs()
+
+        result = True
+        repros = {}
+        for job in jobs:
+            if not TARGETS[job.config.name].test_repro:
+                self.logger.info("not testing repro for %s", job.config.name)
                continue

-            if self.job_os[job_id] == OS.linux and not has_gdb:
+            if TARGETS[job.config.name].os == OS.linux and not has_gdb:
                self.logger.warning(
-                    "missing gdb in path, not launching repro: %s",
-                    self.target_jobs[job_id],
+                    "skipping repro for %s, missing gdb", job.config.name
                )
                continue

-            if self.job_os[job_id] == OS.windows and not has_cdb:
+            if TARGETS[job.config.name].os == OS.windows and not has_cdb:
                self.logger.warning(
-                    "missing cdb in path, not launching repro: %s",
-                    self.target_jobs[job_id],
+                    "skipping repro for %s, missing cdb", job.config.name
                )
                continue

-            self.logger.info("launching repro: %s", self.target_jobs[job_id])
-            report = self.get_job_crash(job_id)
+            report = self.get_job_crash_report(job.job_id)
            if report is None:
-                self.logger.warning(
-                    "target does not include crash reports: %s",
-                    self.target_jobs[job_id],
-                )
-                return
-            (container, path) = report
-            self.repros[job_id] = self.of.repro.create(container, path, duration=1)
-
-    def check_repro_impl(
-        self,
-    ) -> Tuple[bool, str, bool]:
-        # check all of the launched repros
-
-        self.cleared = False
-
-        def clear() -> None:
-            if not self.cleared:
-                self.cleared = True
-                print("")
-
-        commands: Dict[OS, Tuple[str, str]] = {
-            OS.windows: ("r rip", r"^rip=[a-f0-9]{16}"),
-            OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
-        }
-
-        info: Dict[str, List[str]] = {}
-
-        done: Set[UUID] = set()
-        for job_id, repro in self.repros.items():
-            repro = self.of.repro.get(repro.vm_id)
-            if repro.error:
-                clear()
                self.logger.error(
-                    "repro failed: %s: %s", self.target_jobs[job_id], repro.error
+                    "target does not include crash reports: %s", job.config.name
                )
-                self.failed_jobs.add(job_id)
-                done.add(job_id)
-            elif repro.state not in [VmState.init, VmState.extensions_launch]:
-                done.add(job_id)
-                try:
-                    result = self.of.repro.connect(
-                        repro.vm_id,
-                        delete_after_use=True,
-                        debug_command=commands[repro.os][0],
-                    )
-                    if result is not None and re.search(
-                        commands[repro.os][1], result, re.MULTILINE
-                    ):
-                        clear()
-                        self.logger.info(
-                            "repro succeeded: %s", self.target_jobs[job_id]
-                        )
-                        self.failed_jobs.add(job_id)
-                        done.add(job_id)
-                    else:
-                        clear()
-                        self.logger.error(
-                            "repro failed: %s: %s", self.target_jobs[job_id], result
-                        )
-                        self.failed_jobs.add(job_id)
-                        done.add(job_id)
-                except Exception as e:
+                result = False
+            else:
+                self.logger.info("launching repro: %s", job.config.name)
+                (container, path) = report
+                repro = self.of.repro.create(container, path, duration=1)
+                repros[job.job_id] = (job, repro)
+
+        return (result, repros)
+
+    def check_repro(self, repros: Dict[UUID, Tuple[Job, Repro]]) -> bool:
+        self.logger.info("checking repros")
+        self.success = True
+
+        def check_repro_impl() -> Tuple[bool, str, bool]:
+            # check all of the launched repros
+
+            self.cleared = False
+
+            def clear() -> None:
+                if not self.cleared:
+                    self.cleared = True
+                    print("")
+
+            commands: Dict[OS, Tuple[str, str]] = {
+                OS.windows: ("r rip", r"^rip=[a-f0-9]{16}"),
+                OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
+            }
+
+            for (job, repro) in list(repros.values()):
+                repros[job.job_id] = (job, self.of.repro.get(repro.vm_id))
+
+            for (job, repro) in list(repros.values()):
+                if repro.error:
                    clear()
                    self.logger.error(
-                        "repro failed: %s: %s", self.target_jobs[job_id], repr(e)
+                        "repro failed: %s: %s",
+                        job.config.name,
+                        repro.error,
                    )
-                    self.failed_jobs.add(job_id)
-                    done.add(job_id)
-            else:
-                if repro.state.name not in info:
-                    info[repro.state.name] = []
-                info[repro.state.name].append(self.target_jobs[job_id])
+                    self.of.repro.delete(repro.vm_id)
+                    del repros[job.job_id]
+                elif repro.state == VmState.running:
+                    try:
+                        result = self.of.repro.connect(
+                            repro.vm_id,
+                            delete_after_use=True,
+                            debug_command=commands[repro.os][0],
+                        )
+                        if result is not None and re.search(
+                            commands[repro.os][1], result, re.MULTILINE
+                        ):
+                            clear()
+                            self.logger.info("repro succeeded: %s", job.config.name)
+                        else:
+                            clear()
+                            self.logger.error(
+                                "repro failed: %s - %s", job.config.name, result
+                            )
+                    except Exception as err:
+                        clear()
+                        self.logger.error("repro failed: %s - %s", job.config.name, err)
+                    del repros[job.job_id]
+                elif repro.state not in [VmState.init, VmState.extensions_launch]:
+                    self.logger.error(
+                        "repro failed: %s - bad state: %s", job.config.name, repro.state
+                    )
+                    del repros[job.job_id]

-        for job_id in done:
-            self.of.repro.delete(self.repros[job_id].vm_id)
-            del self.repros[job_id]
+            repro_states: Dict[str, List[str]] = {}
+            for (job, repro) in repros.values():
+                if repro.state.name not in repro_states:
+                    repro_states[repro.state.name] = []
+                repro_states[repro.state.name].append(job.config.name)

-        logline = []
-        for name in info:
-            logline.append("%s:%s" % (name, ",".join(info[name])))
+            logline = []
+            for state in repro_states:
+                logline.append("%s:%s" % (state, ",".join(repro_states[state])))

-        msg = "waiting repro: %s" % " ".join(logline)
-        if len(logline) > 80:
-            msg = "waiting on %d repros" % len(self.repros)
+            msg = "waiting repro: %s" % " ".join(logline)
+            if len(msg) > 80:
+                msg = "waiting on %d repros" % len(repros)
+            return (not bool(repros), msg, self.success)

-        return (
-            not bool(self.repros),
-            msg,
-            bool(self.failed_jobs),
+        return wait(check_repro_impl)
+
+    def get_jobs(self) -> List[Job]:
+        jobs = self.of.jobs.list(job_state=None)
+        jobs = [x for x in jobs if x.config.project == self.project]
+        return jobs
+
+    def stop_job(self, job: Job, delete_containers: bool = False) -> None:
+        self.of.template.stop(
+            job.config.project,
+            job.config.name,
+            BUILD,
+            delete_containers=delete_containers,
        )

-    def check_repro(self) -> bool:
-        self.logger.info("checking repros")
-        return wait(self.check_repro_impl)
+    def get_pools(self) -> List[Pool]:
+        pools = self.of.pools.list()
+        pools = [x for x in pools if x.name == f"testpool-{x.os.name}-{self.test_id}"]
+        return pools

-    def stop_template(self, target: str, delete_containers: bool = True) -> None:
-        """ stop a specific template """
-
-        if self.skip_cleanup:
-            self.logger.warning("not cleaning up target: %s", target)
-        else:
-            self.of.template.stop(
-                self.project,
-                target,
-                BUILD,
-                delete_containers=delete_containers,
-                stop_notifications=True,
-            )
-
-    def cleanup(self, *, user_pools: Optional[Dict[str, str]] = None) -> bool:
+    def cleanup(self) -> None:
        """ cleanup all of the integration pools & jobs """

-        if self.skip_cleanup:
-            self.logger.warning("not cleaning up")
-            return True
-
        self.logger.info("cleaning up")
        errors: List[Exception] = []

-        for target, config in TARGETS.items():
-            if config.os not in self.os:
-                continue
-            if target not in self.targets:
-                continue
-
+        jobs = self.get_jobs()
+        for job in jobs:
            try:
-                self.logger.info("stopping %s", target)
-                self.stop_template(target, delete_containers=False)
+                self.stop_job(job, delete_containers=True)
            except Exception as e:
-                self.logger.error("cleanup of %s failed", target)
+                self.logger.error("cleanup of job failed: %s - %s", job, e)
                errors.append(e)

-        for pool in self.pools.values():
-            if user_pools and pool.name in user_pools.values():
-                continue
-
+        for pool in self.get_pools():
            self.logger.info(
                "halting: %s:%s:%s", pool.name, pool.os.name, pool.arch.name
            )
@ -595,52 +615,115 @@ class TestOnefuzz:
                self.logger.error("cleanup of pool failed: %s - %s", pool.name, e)
                errors.append(e)

-        for repro in self.repros.values():
-            try:
-                self.of.repro.delete(repro.vm_id)
-            except Exception as e:
-                self.logger.error("cleanup of repro failed: %s - %s", repro.vm_id, e)
-                errors.append(e)
+        container_names = set()
+        for job in jobs:
+            for task in self.of.tasks.list(job_id=job.job_id, state=None):
+                for container in task.config.containers:
+                    if container.type in [
+                        ContainerType.reports,
+                        ContainerType.unique_reports,
+                    ]:
+                        container_names.add(container.name)

-        return not bool(errors)
+        for repro in self.of.repro.list():
+            if repro.config.container in container_names:
+                try:
+                    self.of.repro.delete(repro.vm_id)
+                except Exception as e:
+                    self.logger.error("cleanup of repro failed: %s %s", repro.vm_id, e)
+                    errors.append(e)
+
+        if errors:
+            raise Exception("cleanup failed")


 class Run(Command):
+    def check_jobs(
+        self,
+        test_id: UUID,
+        *,
+        endpoint: Optional[str],
+        poll: bool = False,
+        stop_on_complete_check: bool = False,
+    ) -> None:
+        self.onefuzz.__setup__(endpoint=endpoint)
+        tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
+        result = tester.check_jobs(
+            poll=poll, stop_on_complete_check=stop_on_complete_check
+        )
+        if not result:
+            raise Exception("jobs failed")
+
+    def check_repros(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
+        self.onefuzz.__setup__(endpoint=endpoint)
+        tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
+        launch_result, repros = tester.launch_repro()
+        result = tester.check_repro(repros)
+        if not (result and launch_result):
+            raise Exception("repros failed")
+
+    def launch(
+        self,
+        samples: Directory,
+        *,
+        endpoint: Optional[str] = None,
+        pool_size: int = 10,
+        region: Optional[Region] = None,
+        os_list: List[OS] = [OS.linux, OS.windows],
+        targets: List[str] = list(TARGETS.keys()),
+        test_id: Optional[UUID] = None,
+        duration: int = 1,
+    ) -> UUID:
+        if test_id is None:
+            test_id = uuid4()
+        self.logger.info("launching test_id: %s", test_id)
+
+        self.onefuzz.__setup__(endpoint=endpoint)
+        tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
+        tester.setup(region=region, pool_size=pool_size, os_list=os_list)
+        tester.launch(samples, os_list=os_list, targets=targets, duration=duration)
+        return test_id
+
+    def cleanup(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
+        self.onefuzz.__setup__(endpoint=endpoint)
+        tester = TestOnefuzz(self.onefuzz, self.logger, test_id=test_id)
+        tester.cleanup()
+
    def test(
        self,
        samples: Directory,
        *,
        endpoint: Optional[str] = None,
-        user_pools: Optional[Dict[str, str]] = None,
-        pool_size: int = 10,
+        pool_size: int = 15,
        region: Optional[Region] = None,
        os_list: List[OS] = [OS.linux, OS.windows],
        targets: List[str] = list(TARGETS.keys()),
        skip_repro: bool = False,
-        skip_cleanup: bool = False,
+        duration: int = 1,
    ) -> None:
-        self.onefuzz.__setup__(endpoint=endpoint)
-        tester = TestOnefuzz(
-            self.onefuzz,
-            self.logger,
-            pool_size=pool_size,
-            os_list=os_list,
-            targets=targets,
-            skip_cleanup=skip_cleanup,
-        )
        success = True

+        test_id = uuid4()
        error: Optional[Exception] = None
        try:
-            tester.setup(region=region, user_pools=user_pools)
-            tester.launch(samples)
-            tester.check_jobs()
+            self.launch(
+                samples,
+                endpoint=endpoint,
+                pool_size=pool_size,
+                region=region,
+                os_list=os_list,
+                targets=targets,
+                test_id=test_id,
+                duration=duration,
+            )
+            self.check_jobs(
+                test_id, endpoint=endpoint, poll=True, stop_on_complete_check=True
+            )
+
            if skip_repro:
                self.logger.warning("not testing crash repro")
            else:
-                self.logger.info("launching crash repro tests")
-                tester.launch_repro()
-                tester.check_repro()
+                self.check_repros(test_id, endpoint=endpoint)
        except Exception as e:
            self.logger.error("testing failed: %s", repr(e))
            error = e
@ -649,10 +732,11 @@ class Run(Command):
            self.logger.error("interrupted testing")
            success = False

-        if not tester.cleanup(user_pools=user_pools):
-            success = False
-
-        if tester.failed_jobs or tester.failed_repro:
+        try:
+            self.cleanup(test_id, endpoint=endpoint)
+        except Exception as e:
+            self.logger.error("testing failed: %s", repr(e))
+            error = e
            success = False

        if error: