mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-14 11:08:06 +00:00
enable long-running integration tests (#654)
This commit is contained in:
@ -7,13 +7,13 @@
|
||||
""" Launch multiple templates using samples to verify Onefuzz works end-to-end """
|
||||
|
||||
# NOTE:
|
||||
# 1. This script uses pre-built fuzzing samples from the onefuzz-samples project.
|
||||
# https://github.com/microsoft/onefuzz-samples/releases/latest
|
||||
# 1. This script uses an unpacked version of the `integration-test-results`
|
||||
# from the CI pipeline.
|
||||
#
|
||||
# 2. This script will create new pools & managed scalesets during the testing by
|
||||
# default. To use pre-existing pools, specify `--user_pools os=pool_name`
|
||||
# Check out https://github.com/microsoft/onefuzz/actions/workflows/
|
||||
# ci.yml?query=branch%3Amain+is%3Asuccess
|
||||
#
|
||||
# 3. For each stage, this script launches everything for the stage in batch, then
|
||||
# 2. For each stage, this script launches everything for the stage in batch, then
|
||||
# checks on each of the created items for the stage. This batch processing
|
||||
# allows testing multiple components concurrently.
|
||||
|
||||
@ -30,7 +30,7 @@ from onefuzz.api import Command, Onefuzz
|
||||
from onefuzz.backend import ContainerWrapper, wait
|
||||
from onefuzz.cli import execute_api
|
||||
from onefuzztypes.enums import OS, ContainerType, TaskState, VmState
|
||||
from onefuzztypes.models import Job, Pool, Repro, Scaleset
|
||||
from onefuzztypes.models import Job, Pool, Repro, Scaleset, Task
|
||||
from onefuzztypes.primitives import Container, Directory, File, PoolName, Region
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@ -39,6 +39,13 @@ WINDOWS_POOL = "linux-test"
|
||||
BUILD = "0"
|
||||
|
||||
|
||||
class TaskTestState(Enum):
|
||||
not_running = "not_running"
|
||||
running = "running"
|
||||
stopped = "stopped"
|
||||
failed = "failed"
|
||||
|
||||
|
||||
class TemplateType(Enum):
|
||||
libfuzzer = "libfuzzer"
|
||||
libfuzzer_dotnet = "libfuzzer_dotnet"
|
||||
@ -54,7 +61,7 @@ class Integration(BaseModel):
|
||||
inputs: Optional[str]
|
||||
use_setup: bool = Field(default=False)
|
||||
nested_setup_dir: Optional[str]
|
||||
wait_for_files: List[ContainerType]
|
||||
wait_for_files: Dict[ContainerType, int]
|
||||
check_asan_log: Optional[bool] = Field(default=False)
|
||||
disable_check_debugger: Optional[bool] = Field(default=False)
|
||||
reboot_after_setup: Optional[bool] = Field(default=False)
|
||||
@ -67,14 +74,18 @@ TARGETS: Dict[str, Integration] = {
|
||||
os=OS.linux,
|
||||
target_exe="fuzz.exe",
|
||||
inputs="seeds",
|
||||
wait_for_files=[ContainerType.unique_reports],
|
||||
wait_for_files={ContainerType.unique_reports: 1},
|
||||
),
|
||||
"linux-libfuzzer": Integration(
|
||||
template=TemplateType.libfuzzer,
|
||||
os=OS.linux,
|
||||
target_exe="fuzz.exe",
|
||||
inputs="seeds",
|
||||
wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
|
||||
wait_for_files={
|
||||
ContainerType.unique_reports: 1,
|
||||
ContainerType.coverage: 1,
|
||||
ContainerType.inputs: 2,
|
||||
},
|
||||
reboot_after_setup=True,
|
||||
),
|
||||
"linux-libfuzzer-dotnet": Integration(
|
||||
@ -84,7 +95,8 @@ TARGETS: Dict[str, Integration] = {
|
||||
nested_setup_dir="my-fuzzer",
|
||||
inputs="inputs",
|
||||
use_setup=True,
|
||||
wait_for_files=[ContainerType.inputs, ContainerType.crashes],
|
||||
wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
|
||||
test_repro=False,
|
||||
),
|
||||
"linux-libfuzzer-aarch64-crosscompile": Integration(
|
||||
template=TemplateType.libfuzzer_qemu_user,
|
||||
@ -92,28 +104,28 @@ TARGETS: Dict[str, Integration] = {
|
||||
target_exe="fuzz.exe",
|
||||
inputs="inputs",
|
||||
use_setup=True,
|
||||
wait_for_files=[ContainerType.inputs, ContainerType.crashes],
|
||||
wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
|
||||
test_repro=False,
|
||||
),
|
||||
"linux-libfuzzer-rust": Integration(
|
||||
template=TemplateType.libfuzzer,
|
||||
os=OS.linux,
|
||||
target_exe="fuzz_target_1",
|
||||
wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
|
||||
wait_for_files={ContainerType.unique_reports: 1, ContainerType.coverage: 1},
|
||||
),
|
||||
"linux-trivial-crash": Integration(
|
||||
template=TemplateType.radamsa,
|
||||
os=OS.linux,
|
||||
target_exe="fuzz.exe",
|
||||
inputs="seeds",
|
||||
wait_for_files=[ContainerType.unique_reports],
|
||||
wait_for_files={ContainerType.unique_reports: 1},
|
||||
),
|
||||
"linux-trivial-crash-asan": Integration(
|
||||
template=TemplateType.radamsa,
|
||||
os=OS.linux,
|
||||
target_exe="fuzz.exe",
|
||||
inputs="seeds",
|
||||
wait_for_files=[ContainerType.unique_reports],
|
||||
wait_for_files={ContainerType.unique_reports: 1},
|
||||
check_asan_log=True,
|
||||
disable_check_debugger=True,
|
||||
),
|
||||
@ -122,89 +134,53 @@ TARGETS: Dict[str, Integration] = {
|
||||
os=OS.windows,
|
||||
target_exe="fuzz.exe",
|
||||
inputs="seeds",
|
||||
wait_for_files=[
|
||||
ContainerType.unique_reports,
|
||||
ContainerType.coverage,
|
||||
],
|
||||
wait_for_files={
|
||||
ContainerType.inputs: 2,
|
||||
ContainerType.unique_reports: 1,
|
||||
ContainerType.coverage: 1,
|
||||
},
|
||||
),
|
||||
"windows-trivial-crash": Integration(
|
||||
template=TemplateType.radamsa,
|
||||
os=OS.windows,
|
||||
target_exe="fuzz.exe",
|
||||
inputs="seeds",
|
||||
wait_for_files=[ContainerType.unique_reports],
|
||||
wait_for_files={ContainerType.unique_reports: 1},
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
class TestOnefuzz:
|
||||
def __init__(
|
||||
self,
|
||||
onefuzz: Onefuzz,
|
||||
logger: logging.Logger,
|
||||
*,
|
||||
pool_size: int,
|
||||
os_list: List[OS],
|
||||
targets: List[str],
|
||||
skip_cleanup: bool,
|
||||
) -> None:
|
||||
def __init__(self, onefuzz: Onefuzz, logger: logging.Logger, test_id: UUID) -> None:
|
||||
self.of = onefuzz
|
||||
self.logger = logger
|
||||
self.pools: Dict[OS, Pool] = {}
|
||||
self.project = "test-" + str(uuid4()).split("-")[0]
|
||||
self.pool_size = pool_size
|
||||
self.os = os_list
|
||||
self.targets = targets
|
||||
self.skip_cleanup = skip_cleanup
|
||||
|
||||
# job_id -> Job
|
||||
self.jobs: Dict[UUID, Job] = {}
|
||||
|
||||
# job_id -> List[container_url]
|
||||
self.containers: Dict[UUID, List[ContainerWrapper]] = {}
|
||||
|
||||
# task_id -> job_id
|
||||
self.tasks: Dict[UUID, UUID] = {}
|
||||
|
||||
self.job_os: Dict[UUID, OS] = {}
|
||||
|
||||
self.successful_jobs: Set[UUID] = set()
|
||||
self.failed_jobs: Set[UUID] = set()
|
||||
self.failed_repro: Set[UUID] = set()
|
||||
|
||||
# job_id -> Repro
|
||||
self.repros: Dict[UUID, Repro] = {}
|
||||
|
||||
# job_id -> target
|
||||
self.target_jobs: Dict[UUID, str] = {}
|
||||
self.test_id = test_id
|
||||
self.project = f"test-{self.test_id}"
|
||||
|
||||
def setup(
|
||||
self,
|
||||
*,
|
||||
region: Optional[Region] = None,
|
||||
user_pools: Optional[Dict[str, str]] = None,
|
||||
pool_size: int,
|
||||
os_list: List[OS],
|
||||
) -> None:
|
||||
for entry in self.os:
|
||||
if user_pools and entry.name in user_pools:
|
||||
self.logger.info(
|
||||
"using existing pool: %s:%s", entry.name, user_pools[entry.name]
|
||||
)
|
||||
self.pools[entry] = self.of.pools.get(user_pools[entry.name])
|
||||
else:
|
||||
name = PoolName("pool-%s-%s" % (self.project, entry.name))
|
||||
self.logger.info("creating pool: %s:%s", entry.name, name)
|
||||
self.pools[entry] = self.of.pools.create(name, entry)
|
||||
self.logger.info("creating scaleset for pool: %s", name)
|
||||
self.of.scalesets.create(name, self.pool_size, region=region)
|
||||
for entry in os_list:
|
||||
name = PoolName(f"testpool-{entry.name}-{self.test_id}")
|
||||
self.logger.info("creating pool: %s:%s", entry.name, name)
|
||||
self.pools[entry] = self.of.pools.create(name, entry)
|
||||
self.logger.info("creating scaleset for pool: %s", name)
|
||||
self.of.scalesets.create(name, pool_size, region=region)
|
||||
|
||||
def launch(self, path: str) -> None:
|
||||
def launch(
|
||||
self, path: Directory, *, os_list: List[OS], targets: List[str], duration=int
|
||||
) -> None:
|
||||
""" Launch all of the fuzzing templates """
|
||||
|
||||
for target, config in TARGETS.items():
|
||||
if target not in self.targets:
|
||||
if target not in targets:
|
||||
continue
|
||||
|
||||
if config.os not in self.os:
|
||||
if config.os not in os_list:
|
||||
continue
|
||||
|
||||
self.logger.info("launching: %s", target)
|
||||
@ -230,7 +206,7 @@ class TestOnefuzz:
|
||||
target_exe=target_exe,
|
||||
inputs=inputs,
|
||||
setup_dir=setup,
|
||||
duration=1,
|
||||
duration=duration,
|
||||
vm_count=1,
|
||||
reboot_after_setup=config.reboot_after_setup or False,
|
||||
)
|
||||
@ -245,7 +221,7 @@ class TestOnefuzz:
|
||||
target_harness=config.target_exe,
|
||||
inputs=inputs,
|
||||
setup_dir=setup,
|
||||
duration=1,
|
||||
duration=duration,
|
||||
vm_count=1,
|
||||
)
|
||||
elif config.template == TemplateType.libfuzzer_qemu_user:
|
||||
@ -256,7 +232,7 @@ class TestOnefuzz:
|
||||
self.pools[config.os].name,
|
||||
inputs=inputs,
|
||||
target_exe=target_exe,
|
||||
duration=1,
|
||||
duration=duration,
|
||||
vm_count=1,
|
||||
)
|
||||
elif config.template == TemplateType.radamsa:
|
||||
@ -270,7 +246,7 @@ class TestOnefuzz:
|
||||
setup_dir=setup,
|
||||
check_asan_log=config.check_asan_log or False,
|
||||
disable_check_debugger=config.disable_check_debugger or False,
|
||||
duration=1,
|
||||
duration=duration,
|
||||
vm_count=1,
|
||||
)
|
||||
elif config.template == TemplateType.afl:
|
||||
@ -282,7 +258,7 @@ class TestOnefuzz:
|
||||
target_exe=target_exe,
|
||||
inputs=inputs,
|
||||
setup_dir=setup,
|
||||
duration=1,
|
||||
duration=duration,
|
||||
vm_count=1,
|
||||
)
|
||||
else:
|
||||
@ -291,21 +267,9 @@ class TestOnefuzz:
|
||||
if not job:
|
||||
raise Exception("missing job")
|
||||
|
||||
self.containers[job.job_id] = []
|
||||
for task in self.of.tasks.list(job_id=job.job_id):
|
||||
self.tasks[task.task_id] = job.job_id
|
||||
self.containers[job.job_id] += [
|
||||
ContainerWrapper(self.of.containers.get(x.name).sas_url)
|
||||
for x in task.config.containers
|
||||
if x.type in TARGETS[job.config.name].wait_for_files
|
||||
]
|
||||
self.jobs[job.job_id] = job
|
||||
self.job_os[job.job_id] = config.os
|
||||
self.target_jobs[job.job_id] = target
|
||||
|
||||
def check_task(self, task_id: UUID, scalesets: List[Scaleset]) -> Optional[str]:
|
||||
task = self.of.tasks.get(task_id)
|
||||
|
||||
def check_task(
|
||||
self, job: Job, task: Task, scalesets: List[Scaleset]
|
||||
) -> TaskTestState:
|
||||
# Check if the scaleset the task is assigned is OK
|
||||
for scaleset in scalesets:
|
||||
if (
|
||||
@ -313,279 +277,335 @@ class TestOnefuzz:
|
||||
and scaleset.pool_name == task.config.pool.pool_name
|
||||
and scaleset.state not in scaleset.state.available()
|
||||
):
|
||||
return "task scaleset failed: %s - %s - %s (%s)" % (
|
||||
self.jobs[self.tasks[task_id]].config.name,
|
||||
self.logger.error(
|
||||
"task scaleset failed: %s - %s - %s (%s)",
|
||||
job.config.name,
|
||||
task.config.task.type.name,
|
||||
scaleset.state.name,
|
||||
scaleset.error,
|
||||
)
|
||||
return TaskTestState.failed
|
||||
|
||||
task = self.of.tasks.get(task.task_id)
|
||||
|
||||
# check if the task itself has an error
|
||||
if task.error is not None:
|
||||
return "task failed: %s - %s - %s (%s)" % (
|
||||
task_id,
|
||||
self.jobs[self.tasks[task_id]].config.name,
|
||||
self.logger.error(
|
||||
"task failed: %s - %s (%s)",
|
||||
job.config.name,
|
||||
task.config.task.type.name,
|
||||
task.error,
|
||||
)
|
||||
return TaskTestState.failed
|
||||
|
||||
# just in case someone else stopped the task
|
||||
if task.state in TaskState.shutting_down():
|
||||
return "task shutdown early: %s - %s" % (
|
||||
self.jobs[self.tasks[task_id]].config.name,
|
||||
task.config.task.type.name,
|
||||
)
|
||||
return None
|
||||
if task.state in [TaskState.stopped, TaskState.stopping]:
|
||||
return TaskTestState.stopped
|
||||
|
||||
if task.state == TaskState.running:
|
||||
return TaskTestState.running
|
||||
|
||||
return TaskTestState.not_running
|
||||
|
||||
def check_jobs(
|
||||
self, poll: bool = False, stop_on_complete_check: bool = False
|
||||
) -> bool:
|
||||
""" Check all of the integration jobs """
|
||||
jobs: Dict[UUID, Job] = {x.job_id: x for x in self.get_jobs()}
|
||||
job_tasks: Dict[UUID, List[Task]] = {}
|
||||
check_containers: Dict[UUID, Dict[Container, Tuple[ContainerWrapper, int]]] = {}
|
||||
|
||||
for job in jobs.values():
|
||||
if job.config.name not in TARGETS:
|
||||
self.logger.error("unknown job target: %s", job.config.name)
|
||||
continue
|
||||
|
||||
tasks = self.of.jobs.tasks.list(job.job_id)
|
||||
job_tasks[job.job_id] = tasks
|
||||
check_containers[job.job_id] = {}
|
||||
for task in tasks:
|
||||
for container in task.config.containers:
|
||||
if container.type in TARGETS[job.config.name].wait_for_files:
|
||||
count = TARGETS[job.config.name].wait_for_files[container.type]
|
||||
check_containers[job.job_id][container.name] = (
|
||||
ContainerWrapper(
|
||||
self.of.containers.get(container.name).sas_url
|
||||
),
|
||||
count,
|
||||
)
|
||||
|
||||
self.success = True
|
||||
self.logger.info("checking %d jobs", len(jobs))
|
||||
|
||||
def check_jobs_impl(
|
||||
self,
|
||||
) -> Tuple[bool, str, bool]:
|
||||
self.cleared = False
|
||||
|
||||
def clear() -> None:
|
||||
if not self.cleared:
|
||||
self.cleared = True
|
||||
print("")
|
||||
if poll:
|
||||
print("")
|
||||
|
||||
if self.jobs:
|
||||
finished_job: Set[UUID] = set()
|
||||
def check_jobs_impl() -> Tuple[bool, str, bool]:
|
||||
self.cleared = False
|
||||
failed_jobs: Set[UUID] = set()
|
||||
job_task_states: Dict[UUID, Set[TaskTestState]] = {}
|
||||
|
||||
# check all the containers we care about for the job
|
||||
for job_id in self.containers:
|
||||
done: Set[ContainerWrapper] = set()
|
||||
for container in self.containers[job_id]:
|
||||
if len(container.list_blobs()) > 0:
|
||||
for job_id in check_containers:
|
||||
finished_containers: Set[Container] = set()
|
||||
for (container_name, container_impl) in check_containers[
|
||||
job_id
|
||||
].items():
|
||||
container_client, count = container_impl
|
||||
if len(container_client.list_blobs()) >= count:
|
||||
clear()
|
||||
self.logger.info(
|
||||
"new files in: %s", container.client.container_name
|
||||
"found files for %s - %s",
|
||||
jobs[job_id].config.name,
|
||||
container_name,
|
||||
)
|
||||
done.add(container)
|
||||
for container in done:
|
||||
self.containers[job_id].remove(container)
|
||||
if not self.containers[job_id]:
|
||||
clear()
|
||||
self.logger.info("finished: %s", self.jobs[job_id].config.name)
|
||||
finished_job.add(job_id)
|
||||
finished_containers.add(container_name)
|
||||
|
||||
# check all the tasks associated with the job
|
||||
if self.tasks:
|
||||
scalesets = self.of.scalesets.list()
|
||||
for task_id in self.tasks:
|
||||
error = self.check_task(task_id, scalesets)
|
||||
if error is not None:
|
||||
for container_name in finished_containers:
|
||||
del check_containers[job_id][container_name]
|
||||
|
||||
scalesets = self.of.scalesets.list()
|
||||
for job_id in job_tasks:
|
||||
finished_tasks: Set[UUID] = set()
|
||||
job_task_states[job_id] = set()
|
||||
|
||||
for task in job_tasks[job_id]:
|
||||
if job_id not in jobs:
|
||||
continue
|
||||
|
||||
task_result = self.check_task(jobs[job_id], task, scalesets)
|
||||
if task_result == TaskTestState.failed:
|
||||
self.success = False
|
||||
failed_jobs.add(job_id)
|
||||
elif task_result == TaskTestState.stopped:
|
||||
finished_tasks.add(task.task_id)
|
||||
else:
|
||||
job_task_states[job_id].add(task_result)
|
||||
job_tasks[job_id] = [
|
||||
x for x in job_tasks[job_id] if x.task_id not in finished_tasks
|
||||
]
|
||||
|
||||
to_remove: Set[UUID] = set()
|
||||
for job in jobs.values():
|
||||
# stop tracking failed jobs
|
||||
if job.job_id in failed_jobs:
|
||||
if job.job_id in check_containers:
|
||||
del check_containers[job.job_id]
|
||||
if job.job_id in job_tasks:
|
||||
del job_tasks[job.job_id]
|
||||
continue
|
||||
|
||||
# stop checking containers once all the containers for the job
|
||||
# have checked out.
|
||||
if job.job_id in check_containers:
|
||||
if not check_containers[job.job_id]:
|
||||
clear()
|
||||
self.logger.error(error)
|
||||
finished_job.add(self.tasks[task_id])
|
||||
self.failed_jobs.add(self.tasks[task_id])
|
||||
self.logger.info(
|
||||
"found files in all containers for %s", job.config.name
|
||||
)
|
||||
del check_containers[job.job_id]
|
||||
|
||||
# cleanup jobs that are done testing
|
||||
for job_id in finished_job:
|
||||
self.stop_template(
|
||||
self.jobs[job_id].config.name, delete_containers=False
|
||||
)
|
||||
if job.job_id not in check_containers:
|
||||
if job.job_id in job_task_states:
|
||||
if set([TaskTestState.running]).issuperset(
|
||||
job_task_states[job.job_id]
|
||||
):
|
||||
del job_tasks[job.job_id]
|
||||
|
||||
for task_id, task_job_id in list(self.tasks.items()):
|
||||
if job_id == task_job_id:
|
||||
del self.tasks[task_id]
|
||||
if job.job_id not in job_tasks and job.job_id not in check_containers:
|
||||
clear()
|
||||
self.logger.info("%s completed", job.config.name)
|
||||
to_remove.add(job.job_id)
|
||||
|
||||
if job_id in self.jobs:
|
||||
self.successful_jobs.add(job_id)
|
||||
del self.jobs[job_id]
|
||||
for job_id in to_remove:
|
||||
if stop_on_complete_check:
|
||||
self.stop_job(jobs[job_id])
|
||||
del jobs[job_id]
|
||||
|
||||
if job_id in self.containers:
|
||||
del self.containers[job_id]
|
||||
msg = "waiting on: %s" % ",".join(
|
||||
sorted(x.config.name for x in jobs.values())
|
||||
)
|
||||
if poll and len(msg) > 80:
|
||||
msg = "waiting on %d jobs" % len(jobs)
|
||||
|
||||
msg = "waiting on: %s" % ",".join(
|
||||
sorted(x.config.name for x in self.jobs.values())
|
||||
)
|
||||
if len(msg) > 80:
|
||||
msg = "waiting on %d jobs" % len(self.jobs)
|
||||
if not jobs:
|
||||
msg = "done all tasks"
|
||||
|
||||
return (
|
||||
not bool(self.jobs),
|
||||
msg,
|
||||
not bool(self.failed_jobs),
|
||||
)
|
||||
return (not bool(jobs), msg, self.success)
|
||||
|
||||
def check_jobs(self) -> bool:
|
||||
""" Check all of the integration jobs """
|
||||
self.logger.info("checking jobs")
|
||||
return wait(self.check_jobs_impl)
|
||||
|
||||
def get_job_crash(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
|
||||
# get the crash container for a given job
|
||||
if poll:
|
||||
return wait(check_jobs_impl)
|
||||
else:
|
||||
_, msg, result = check_jobs_impl()
|
||||
self.logger.info(msg)
|
||||
return result
|
||||
|
||||
def get_job_crash_report(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
|
||||
for task in self.of.tasks.list(job_id=job_id, state=None):
|
||||
for container in task.config.containers:
|
||||
if container.type != ContainerType.unique_reports:
|
||||
if container.type not in [
|
||||
ContainerType.unique_reports,
|
||||
ContainerType.reports,
|
||||
]:
|
||||
continue
|
||||
|
||||
files = self.of.containers.files.list(container.name)
|
||||
if len(files.files) > 0:
|
||||
return (container.name, files.files[0])
|
||||
return None
|
||||
|
||||
def launch_repro(self) -> None:
|
||||
def launch_repro(self) -> Tuple[bool, Dict[UUID, Tuple[Job, Repro]]]:
|
||||
# launch repro for one report from all succeessful jobs
|
||||
has_cdb = bool(which("cdb.exe"))
|
||||
has_gdb = bool(which("gdb"))
|
||||
for job_id in self.successful_jobs:
|
||||
if not TARGETS[self.target_jobs[job_id]].test_repro:
|
||||
self.logger.info("skipping repro for %s", self.target_jobs[job_id])
|
||||
|
||||
jobs = self.get_jobs()
|
||||
|
||||
result = True
|
||||
repros = {}
|
||||
for job in jobs:
|
||||
if not TARGETS[job.config.name].test_repro:
|
||||
self.logger.info("not testing repro for %s", job.config.name)
|
||||
continue
|
||||
|
||||
if self.job_os[job_id] == OS.linux and not has_gdb:
|
||||
if TARGETS[job.config.name].os == OS.linux and not has_gdb:
|
||||
self.logger.warning(
|
||||
"missing gdb in path, not launching repro: %s",
|
||||
self.target_jobs[job_id],
|
||||
"skipping repro for %s, missing gdb", job.config.name
|
||||
)
|
||||
continue
|
||||
|
||||
if self.job_os[job_id] == OS.windows and not has_cdb:
|
||||
if TARGETS[job.config.name].os == OS.windows and not has_cdb:
|
||||
self.logger.warning(
|
||||
"missing cdb in path, not launching repro: %s",
|
||||
self.target_jobs[job_id],
|
||||
"skipping repro for %s, missing cdb", job.config.name
|
||||
)
|
||||
continue
|
||||
|
||||
self.logger.info("launching repro: %s", self.target_jobs[job_id])
|
||||
report = self.get_job_crash(job_id)
|
||||
report = self.get_job_crash_report(job.job_id)
|
||||
if report is None:
|
||||
self.logger.warning(
|
||||
"target does not include crash reports: %s",
|
||||
self.target_jobs[job_id],
|
||||
)
|
||||
return
|
||||
(container, path) = report
|
||||
self.repros[job_id] = self.of.repro.create(container, path, duration=1)
|
||||
|
||||
def check_repro_impl(
|
||||
self,
|
||||
) -> Tuple[bool, str, bool]:
|
||||
# check all of the launched repros
|
||||
|
||||
self.cleared = False
|
||||
|
||||
def clear() -> None:
|
||||
if not self.cleared:
|
||||
self.cleared = True
|
||||
print("")
|
||||
|
||||
commands: Dict[OS, Tuple[str, str]] = {
|
||||
OS.windows: ("r rip", r"^rip=[a-f0-9]{16}"),
|
||||
OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
|
||||
}
|
||||
|
||||
info: Dict[str, List[str]] = {}
|
||||
|
||||
done: Set[UUID] = set()
|
||||
for job_id, repro in self.repros.items():
|
||||
repro = self.of.repro.get(repro.vm_id)
|
||||
if repro.error:
|
||||
clear()
|
||||
self.logger.error(
|
||||
"repro failed: %s: %s", self.target_jobs[job_id], repro.error
|
||||
"target does not include crash reports: %s", job.config.name
|
||||
)
|
||||
self.failed_jobs.add(job_id)
|
||||
done.add(job_id)
|
||||
elif repro.state not in [VmState.init, VmState.extensions_launch]:
|
||||
done.add(job_id)
|
||||
try:
|
||||
result = self.of.repro.connect(
|
||||
repro.vm_id,
|
||||
delete_after_use=True,
|
||||
debug_command=commands[repro.os][0],
|
||||
)
|
||||
if result is not None and re.search(
|
||||
commands[repro.os][1], result, re.MULTILINE
|
||||
):
|
||||
clear()
|
||||
self.logger.info(
|
||||
"repro succeeded: %s", self.target_jobs[job_id]
|
||||
)
|
||||
self.failed_jobs.add(job_id)
|
||||
done.add(job_id)
|
||||
else:
|
||||
clear()
|
||||
self.logger.error(
|
||||
"repro failed: %s: %s", self.target_jobs[job_id], result
|
||||
)
|
||||
self.failed_jobs.add(job_id)
|
||||
done.add(job_id)
|
||||
except Exception as e:
|
||||
result = False
|
||||
else:
|
||||
self.logger.info("launching repro: %s", job.config.name)
|
||||
(container, path) = report
|
||||
repro = self.of.repro.create(container, path, duration=1)
|
||||
repros[job.job_id] = (job, repro)
|
||||
|
||||
return (result, repros)
|
||||
|
||||
def check_repro(self, repros: Dict[UUID, Tuple[Job, Repro]]) -> bool:
|
||||
self.logger.info("checking repros")
|
||||
self.success = True
|
||||
|
||||
def check_repro_impl() -> Tuple[bool, str, bool]:
|
||||
# check all of the launched repros
|
||||
|
||||
self.cleared = False
|
||||
|
||||
def clear() -> None:
|
||||
if not self.cleared:
|
||||
self.cleared = True
|
||||
print("")
|
||||
|
||||
commands: Dict[OS, Tuple[str, str]] = {
|
||||
OS.windows: ("r rip", r"^rip=[a-f0-9]{16}"),
|
||||
OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
|
||||
}
|
||||
|
||||
for (job, repro) in list(repros.values()):
|
||||
repros[job.job_id] = (job, self.of.repro.get(repro.vm_id))
|
||||
|
||||
for (job, repro) in list(repros.values()):
|
||||
if repro.error:
|
||||
clear()
|
||||
self.logger.error(
|
||||
"repro failed: %s: %s", self.target_jobs[job_id], repr(e)
|
||||
"repro failed: %s: %s",
|
||||
job.config.name,
|
||||
repro.error,
|
||||
)
|
||||
self.failed_jobs.add(job_id)
|
||||
done.add(job_id)
|
||||
else:
|
||||
if repro.state.name not in info:
|
||||
info[repro.state.name] = []
|
||||
info[repro.state.name].append(self.target_jobs[job_id])
|
||||
self.of.repro.delete(repro.vm_id)
|
||||
del repros[job.job_id]
|
||||
elif repro.state == VmState.running:
|
||||
try:
|
||||
result = self.of.repro.connect(
|
||||
repro.vm_id,
|
||||
delete_after_use=True,
|
||||
debug_command=commands[repro.os][0],
|
||||
)
|
||||
if result is not None and re.search(
|
||||
commands[repro.os][1], result, re.MULTILINE
|
||||
):
|
||||
clear()
|
||||
self.logger.info("repro succeeded: %s", job.config.name)
|
||||
else:
|
||||
clear()
|
||||
self.logger.error(
|
||||
"repro failed: %s - %s", job.config.name, result
|
||||
)
|
||||
except Exception as err:
|
||||
clear()
|
||||
self.logger.error("repro failed: %s - %s", job.config.name, err)
|
||||
del repros[job.job_id]
|
||||
elif repro.state not in [VmState.init, VmState.extensions_launch]:
|
||||
self.logger.error(
|
||||
"repro failed: %s - bad state: %s", job.config.name, repro.state
|
||||
)
|
||||
del repros[job.job_id]
|
||||
|
||||
for job_id in done:
|
||||
self.of.repro.delete(self.repros[job_id].vm_id)
|
||||
del self.repros[job_id]
|
||||
repro_states: Dict[str, List[str]] = {}
|
||||
for (job, repro) in repros.values():
|
||||
if repro.state.name not in repro_states:
|
||||
repro_states[repro.state.name] = []
|
||||
repro_states[repro.state.name].append(job.config.name)
|
||||
|
||||
logline = []
|
||||
for name in info:
|
||||
logline.append("%s:%s" % (name, ",".join(info[name])))
|
||||
logline = []
|
||||
for state in repro_states:
|
||||
logline.append("%s:%s" % (state, ",".join(repro_states[state])))
|
||||
|
||||
msg = "waiting repro: %s" % " ".join(logline)
|
||||
if len(logline) > 80:
|
||||
msg = "waiting on %d repros" % len(self.repros)
|
||||
msg = "waiting repro: %s" % " ".join(logline)
|
||||
if len(msg) > 80:
|
||||
msg = "waiting on %d repros" % len(repros)
|
||||
return (not bool(repros), msg, self.success)
|
||||
|
||||
return (
|
||||
not bool(self.repros),
|
||||
msg,
|
||||
bool(self.failed_jobs),
|
||||
return wait(check_repro_impl)
|
||||
|
||||
def get_jobs(self) -> List[Job]:
|
||||
jobs = self.of.jobs.list(job_state=None)
|
||||
jobs = [x for x in jobs if x.config.project == self.project]
|
||||
return jobs
|
||||
|
||||
def stop_job(self, job: Job, delete_containers: bool = False) -> None:
|
||||
self.of.template.stop(
|
||||
job.config.project,
|
||||
job.config.name,
|
||||
BUILD,
|
||||
delete_containers=delete_containers,
|
||||
)
|
||||
|
||||
def check_repro(self) -> bool:
|
||||
self.logger.info("checking repros")
|
||||
return wait(self.check_repro_impl)
|
||||
def get_pools(self) -> List[Pool]:
|
||||
pools = self.of.pools.list()
|
||||
pools = [x for x in pools if x.name == f"testpool-{x.os.name}-{self.test_id}"]
|
||||
return pools
|
||||
|
||||
def stop_template(self, target: str, delete_containers: bool = True) -> None:
|
||||
""" stop a specific template """
|
||||
|
||||
if self.skip_cleanup:
|
||||
self.logger.warning("not cleaning up target: %s", target)
|
||||
else:
|
||||
self.of.template.stop(
|
||||
self.project,
|
||||
target,
|
||||
BUILD,
|
||||
delete_containers=delete_containers,
|
||||
stop_notifications=True,
|
||||
)
|
||||
|
||||
def cleanup(self, *, user_pools: Optional[Dict[str, str]] = None) -> bool:
|
||||
def cleanup(self) -> None:
|
||||
""" cleanup all of the integration pools & jobs """
|
||||
|
||||
if self.skip_cleanup:
|
||||
self.logger.warning("not cleaning up")
|
||||
return True
|
||||
|
||||
self.logger.info("cleaning up")
|
||||
errors: List[Exception] = []
|
||||
|
||||
for target, config in TARGETS.items():
|
||||
if config.os not in self.os:
|
||||
continue
|
||||
if target not in self.targets:
|
||||
continue
|
||||
|
||||
jobs = self.get_jobs()
|
||||
for job in jobs:
|
||||
try:
|
||||
self.logger.info("stopping %s", target)
|
||||
self.stop_template(target, delete_containers=False)
|
||||
self.stop_job(job, delete_containers=True)
|
||||
except Exception as e:
|
||||
self.logger.error("cleanup of %s failed", target)
|
||||
self.logger.error("cleanup of job failed: %s - %s", job, e)
|
||||
errors.append(e)
|
||||
|
||||
for pool in self.pools.values():
|
||||
if user_pools and pool.name in user_pools.values():
|
||||
continue
|
||||
|
||||
for pool in self.get_pools():
|
||||
self.logger.info(
|
||||
"halting: %s:%s:%s", pool.name, pool.os.name, pool.arch.name
|
||||
)
|
||||
@ -595,52 +615,115 @@ class TestOnefuzz:
|
||||
self.logger.error("cleanup of pool failed: %s - %s", pool.name, e)
|
||||
errors.append(e)
|
||||
|
||||
for repro in self.repros.values():
|
||||
try:
|
||||
self.of.repro.delete(repro.vm_id)
|
||||
except Exception as e:
|
||||
self.logger.error("cleanup of repro failed: %s - %s", repro.vm_id, e)
|
||||
errors.append(e)
|
||||
container_names = set()
|
||||
for job in jobs:
|
||||
for task in self.of.tasks.list(job_id=job.job_id, state=None):
|
||||
for container in task.config.containers:
|
||||
if container.type in [
|
||||
ContainerType.reports,
|
||||
ContainerType.unique_reports,
|
||||
]:
|
||||
container_names.add(container.name)
|
||||
|
||||
return not bool(errors)
|
||||
for repro in self.of.repro.list():
|
||||
if repro.config.container in container_names:
|
||||
try:
|
||||
self.of.repro.delete(repro.vm_id)
|
||||
except Exception as e:
|
||||
self.logger.error("cleanup of repro failed: %s %s", repro.vm_id, e)
|
||||
errors.append(e)
|
||||
|
||||
if errors:
|
||||
raise Exception("cleanup failed")
|
||||
|
||||
|
||||
class Run(Command):
|
||||
def check_jobs(
|
||||
self,
|
||||
test_id: UUID,
|
||||
*,
|
||||
endpoint: Optional[str],
|
||||
poll: bool = False,
|
||||
stop_on_complete_check: bool = False,
|
||||
) -> None:
|
||||
self.onefuzz.__setup__(endpoint=endpoint)
|
||||
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
|
||||
result = tester.check_jobs(
|
||||
poll=poll, stop_on_complete_check=stop_on_complete_check
|
||||
)
|
||||
if not result:
|
||||
raise Exception("jobs failed")
|
||||
|
||||
def check_repros(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
|
||||
self.onefuzz.__setup__(endpoint=endpoint)
|
||||
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
|
||||
launch_result, repros = tester.launch_repro()
|
||||
result = tester.check_repro(repros)
|
||||
if not (result and launch_result):
|
||||
raise Exception("repros failed")
|
||||
|
||||
def launch(
|
||||
self,
|
||||
samples: Directory,
|
||||
*,
|
||||
endpoint: Optional[str] = None,
|
||||
pool_size: int = 10,
|
||||
region: Optional[Region] = None,
|
||||
os_list: List[OS] = [OS.linux, OS.windows],
|
||||
targets: List[str] = list(TARGETS.keys()),
|
||||
test_id: Optional[UUID] = None,
|
||||
duration: int = 1,
|
||||
) -> UUID:
|
||||
if test_id is None:
|
||||
test_id = uuid4()
|
||||
self.logger.info("launching test_id: %s", test_id)
|
||||
|
||||
self.onefuzz.__setup__(endpoint=endpoint)
|
||||
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
|
||||
tester.setup(region=region, pool_size=pool_size, os_list=os_list)
|
||||
tester.launch(samples, os_list=os_list, targets=targets, duration=duration)
|
||||
return test_id
|
||||
|
||||
def cleanup(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
|
||||
self.onefuzz.__setup__(endpoint=endpoint)
|
||||
tester = TestOnefuzz(self.onefuzz, self.logger, test_id=test_id)
|
||||
tester.cleanup()
|
||||
|
||||
def test(
|
||||
self,
|
||||
samples: Directory,
|
||||
*,
|
||||
endpoint: Optional[str] = None,
|
||||
user_pools: Optional[Dict[str, str]] = None,
|
||||
pool_size: int = 10,
|
||||
pool_size: int = 15,
|
||||
region: Optional[Region] = None,
|
||||
os_list: List[OS] = [OS.linux, OS.windows],
|
||||
targets: List[str] = list(TARGETS.keys()),
|
||||
skip_repro: bool = False,
|
||||
skip_cleanup: bool = False,
|
||||
duration: int = 1,
|
||||
) -> None:
|
||||
self.onefuzz.__setup__(endpoint=endpoint)
|
||||
tester = TestOnefuzz(
|
||||
self.onefuzz,
|
||||
self.logger,
|
||||
pool_size=pool_size,
|
||||
os_list=os_list,
|
||||
targets=targets,
|
||||
skip_cleanup=skip_cleanup,
|
||||
)
|
||||
success = True
|
||||
|
||||
test_id = uuid4()
|
||||
error: Optional[Exception] = None
|
||||
try:
|
||||
tester.setup(region=region, user_pools=user_pools)
|
||||
tester.launch(samples)
|
||||
tester.check_jobs()
|
||||
self.launch(
|
||||
samples,
|
||||
endpoint=endpoint,
|
||||
pool_size=pool_size,
|
||||
region=region,
|
||||
os_list=os_list,
|
||||
targets=targets,
|
||||
test_id=test_id,
|
||||
duration=duration,
|
||||
)
|
||||
self.check_jobs(
|
||||
test_id, endpoint=endpoint, poll=True, stop_on_complete_check=True
|
||||
)
|
||||
|
||||
if skip_repro:
|
||||
self.logger.warning("not testing crash repro")
|
||||
else:
|
||||
self.logger.info("launching crash repro tests")
|
||||
tester.launch_repro()
|
||||
tester.check_repro()
|
||||
self.check_repros(test_id, endpoint=endpoint)
|
||||
except Exception as e:
|
||||
self.logger.error("testing failed: %s", repr(e))
|
||||
error = e
|
||||
@ -649,10 +732,11 @@ class Run(Command):
|
||||
self.logger.error("interrupted testing")
|
||||
success = False
|
||||
|
||||
if not tester.cleanup(user_pools=user_pools):
|
||||
success = False
|
||||
|
||||
if tester.failed_jobs or tester.failed_repro:
|
||||
try:
|
||||
self.cleanup(test_id, endpoint=endpoint)
|
||||
except Exception as e:
|
||||
self.logger.error("testing failed: %s", repr(e))
|
||||
error = e
|
||||
success = False
|
||||
|
||||
if error:
|
||||
|
Reference in New Issue
Block a user