enable long-running integration tests (#654)

This commit is contained in:
bmc-msft
2021-03-10 17:03:15 -05:00
committed by GitHub
parent f055e73b13
commit f6a426cc07

View File

@ -7,13 +7,13 @@
""" Launch multiple templates using samples to verify Onefuzz works end-to-end """
# NOTE:
# 1. This script uses pre-built fuzzing samples from the onefuzz-samples project.
# https://github.com/microsoft/onefuzz-samples/releases/latest
# 1. This script uses an unpacked version of the `integration-test-results`
# from the CI pipeline.
#
# 2. This script will create new pools & managed scalesets during the testing by
# default. To use pre-existing pools, specify `--user_pools os=pool_name`
# Check out https://github.com/microsoft/onefuzz/actions/workflows/
# ci.yml?query=branch%3Amain+is%3Asuccess
#
# 3. For each stage, this script launches everything for the stage in batch, then
# 2. For each stage, this script launches everything for the stage in batch, then
# checks on each of the created items for the stage. This batch processing
# allows testing multiple components concurrently.
@ -30,7 +30,7 @@ from onefuzz.api import Command, Onefuzz
from onefuzz.backend import ContainerWrapper, wait
from onefuzz.cli import execute_api
from onefuzztypes.enums import OS, ContainerType, TaskState, VmState
from onefuzztypes.models import Job, Pool, Repro, Scaleset
from onefuzztypes.models import Job, Pool, Repro, Scaleset, Task
from onefuzztypes.primitives import Container, Directory, File, PoolName, Region
from pydantic import BaseModel, Field
@ -39,6 +39,13 @@ WINDOWS_POOL = "linux-test"
BUILD = "0"
class TaskTestState(Enum):
not_running = "not_running"
running = "running"
stopped = "stopped"
failed = "failed"
class TemplateType(Enum):
libfuzzer = "libfuzzer"
libfuzzer_dotnet = "libfuzzer_dotnet"
@ -54,7 +61,7 @@ class Integration(BaseModel):
inputs: Optional[str]
use_setup: bool = Field(default=False)
nested_setup_dir: Optional[str]
wait_for_files: List[ContainerType]
wait_for_files: Dict[ContainerType, int]
check_asan_log: Optional[bool] = Field(default=False)
disable_check_debugger: Optional[bool] = Field(default=False)
reboot_after_setup: Optional[bool] = Field(default=False)
@ -67,14 +74,18 @@ TARGETS: Dict[str, Integration] = {
os=OS.linux,
target_exe="fuzz.exe",
inputs="seeds",
wait_for_files=[ContainerType.unique_reports],
wait_for_files={ContainerType.unique_reports: 1},
),
"linux-libfuzzer": Integration(
template=TemplateType.libfuzzer,
os=OS.linux,
target_exe="fuzz.exe",
inputs="seeds",
wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
wait_for_files={
ContainerType.unique_reports: 1,
ContainerType.coverage: 1,
ContainerType.inputs: 2,
},
reboot_after_setup=True,
),
"linux-libfuzzer-dotnet": Integration(
@ -84,7 +95,8 @@ TARGETS: Dict[str, Integration] = {
nested_setup_dir="my-fuzzer",
inputs="inputs",
use_setup=True,
wait_for_files=[ContainerType.inputs, ContainerType.crashes],
wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
test_repro=False,
),
"linux-libfuzzer-aarch64-crosscompile": Integration(
template=TemplateType.libfuzzer_qemu_user,
@ -92,28 +104,28 @@ TARGETS: Dict[str, Integration] = {
target_exe="fuzz.exe",
inputs="inputs",
use_setup=True,
wait_for_files=[ContainerType.inputs, ContainerType.crashes],
wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
test_repro=False,
),
"linux-libfuzzer-rust": Integration(
template=TemplateType.libfuzzer,
os=OS.linux,
target_exe="fuzz_target_1",
wait_for_files=[ContainerType.unique_reports, ContainerType.coverage],
wait_for_files={ContainerType.unique_reports: 1, ContainerType.coverage: 1},
),
"linux-trivial-crash": Integration(
template=TemplateType.radamsa,
os=OS.linux,
target_exe="fuzz.exe",
inputs="seeds",
wait_for_files=[ContainerType.unique_reports],
wait_for_files={ContainerType.unique_reports: 1},
),
"linux-trivial-crash-asan": Integration(
template=TemplateType.radamsa,
os=OS.linux,
target_exe="fuzz.exe",
inputs="seeds",
wait_for_files=[ContainerType.unique_reports],
wait_for_files={ContainerType.unique_reports: 1},
check_asan_log=True,
disable_check_debugger=True,
),
@ -122,89 +134,53 @@ TARGETS: Dict[str, Integration] = {
os=OS.windows,
target_exe="fuzz.exe",
inputs="seeds",
wait_for_files=[
ContainerType.unique_reports,
ContainerType.coverage,
],
wait_for_files={
ContainerType.inputs: 2,
ContainerType.unique_reports: 1,
ContainerType.coverage: 1,
},
),
"windows-trivial-crash": Integration(
template=TemplateType.radamsa,
os=OS.windows,
target_exe="fuzz.exe",
inputs="seeds",
wait_for_files=[ContainerType.unique_reports],
wait_for_files={ContainerType.unique_reports: 1},
),
}
class TestOnefuzz:
def __init__(
self,
onefuzz: Onefuzz,
logger: logging.Logger,
*,
pool_size: int,
os_list: List[OS],
targets: List[str],
skip_cleanup: bool,
) -> None:
def __init__(self, onefuzz: Onefuzz, logger: logging.Logger, test_id: UUID) -> None:
self.of = onefuzz
self.logger = logger
self.pools: Dict[OS, Pool] = {}
self.project = "test-" + str(uuid4()).split("-")[0]
self.pool_size = pool_size
self.os = os_list
self.targets = targets
self.skip_cleanup = skip_cleanup
# job_id -> Job
self.jobs: Dict[UUID, Job] = {}
# job_id -> List[container_url]
self.containers: Dict[UUID, List[ContainerWrapper]] = {}
# task_id -> job_id
self.tasks: Dict[UUID, UUID] = {}
self.job_os: Dict[UUID, OS] = {}
self.successful_jobs: Set[UUID] = set()
self.failed_jobs: Set[UUID] = set()
self.failed_repro: Set[UUID] = set()
# job_id -> Repro
self.repros: Dict[UUID, Repro] = {}
# job_id -> target
self.target_jobs: Dict[UUID, str] = {}
self.test_id = test_id
self.project = f"test-{self.test_id}"
def setup(
self,
*,
region: Optional[Region] = None,
user_pools: Optional[Dict[str, str]] = None,
pool_size: int,
os_list: List[OS],
) -> None:
for entry in self.os:
if user_pools and entry.name in user_pools:
self.logger.info(
"using existing pool: %s:%s", entry.name, user_pools[entry.name]
)
self.pools[entry] = self.of.pools.get(user_pools[entry.name])
else:
name = PoolName("pool-%s-%s" % (self.project, entry.name))
self.logger.info("creating pool: %s:%s", entry.name, name)
self.pools[entry] = self.of.pools.create(name, entry)
self.logger.info("creating scaleset for pool: %s", name)
self.of.scalesets.create(name, self.pool_size, region=region)
for entry in os_list:
name = PoolName(f"testpool-{entry.name}-{self.test_id}")
self.logger.info("creating pool: %s:%s", entry.name, name)
self.pools[entry] = self.of.pools.create(name, entry)
self.logger.info("creating scaleset for pool: %s", name)
self.of.scalesets.create(name, pool_size, region=region)
def launch(self, path: str) -> None:
def launch(
self, path: Directory, *, os_list: List[OS], targets: List[str], duration=int
) -> None:
""" Launch all of the fuzzing templates """
for target, config in TARGETS.items():
if target not in self.targets:
if target not in targets:
continue
if config.os not in self.os:
if config.os not in os_list:
continue
self.logger.info("launching: %s", target)
@ -230,7 +206,7 @@ class TestOnefuzz:
target_exe=target_exe,
inputs=inputs,
setup_dir=setup,
duration=1,
duration=duration,
vm_count=1,
reboot_after_setup=config.reboot_after_setup or False,
)
@ -245,7 +221,7 @@ class TestOnefuzz:
target_harness=config.target_exe,
inputs=inputs,
setup_dir=setup,
duration=1,
duration=duration,
vm_count=1,
)
elif config.template == TemplateType.libfuzzer_qemu_user:
@ -256,7 +232,7 @@ class TestOnefuzz:
self.pools[config.os].name,
inputs=inputs,
target_exe=target_exe,
duration=1,
duration=duration,
vm_count=1,
)
elif config.template == TemplateType.radamsa:
@ -270,7 +246,7 @@ class TestOnefuzz:
setup_dir=setup,
check_asan_log=config.check_asan_log or False,
disable_check_debugger=config.disable_check_debugger or False,
duration=1,
duration=duration,
vm_count=1,
)
elif config.template == TemplateType.afl:
@ -282,7 +258,7 @@ class TestOnefuzz:
target_exe=target_exe,
inputs=inputs,
setup_dir=setup,
duration=1,
duration=duration,
vm_count=1,
)
else:
@ -291,21 +267,9 @@ class TestOnefuzz:
if not job:
raise Exception("missing job")
self.containers[job.job_id] = []
for task in self.of.tasks.list(job_id=job.job_id):
self.tasks[task.task_id] = job.job_id
self.containers[job.job_id] += [
ContainerWrapper(self.of.containers.get(x.name).sas_url)
for x in task.config.containers
if x.type in TARGETS[job.config.name].wait_for_files
]
self.jobs[job.job_id] = job
self.job_os[job.job_id] = config.os
self.target_jobs[job.job_id] = target
def check_task(self, task_id: UUID, scalesets: List[Scaleset]) -> Optional[str]:
task = self.of.tasks.get(task_id)
def check_task(
self, job: Job, task: Task, scalesets: List[Scaleset]
) -> TaskTestState:
# Check if the scaleset the task is assigned is OK
for scaleset in scalesets:
if (
@ -313,279 +277,335 @@ class TestOnefuzz:
and scaleset.pool_name == task.config.pool.pool_name
and scaleset.state not in scaleset.state.available()
):
return "task scaleset failed: %s - %s - %s (%s)" % (
self.jobs[self.tasks[task_id]].config.name,
self.logger.error(
"task scaleset failed: %s - %s - %s (%s)",
job.config.name,
task.config.task.type.name,
scaleset.state.name,
scaleset.error,
)
return TaskTestState.failed
task = self.of.tasks.get(task.task_id)
# check if the task itself has an error
if task.error is not None:
return "task failed: %s - %s - %s (%s)" % (
task_id,
self.jobs[self.tasks[task_id]].config.name,
self.logger.error(
"task failed: %s - %s (%s)",
job.config.name,
task.config.task.type.name,
task.error,
)
return TaskTestState.failed
# just in case someone else stopped the task
if task.state in TaskState.shutting_down():
return "task shutdown early: %s - %s" % (
self.jobs[self.tasks[task_id]].config.name,
task.config.task.type.name,
)
return None
if task.state in [TaskState.stopped, TaskState.stopping]:
return TaskTestState.stopped
if task.state == TaskState.running:
return TaskTestState.running
return TaskTestState.not_running
def check_jobs(
self, poll: bool = False, stop_on_complete_check: bool = False
) -> bool:
""" Check all of the integration jobs """
jobs: Dict[UUID, Job] = {x.job_id: x for x in self.get_jobs()}
job_tasks: Dict[UUID, List[Task]] = {}
check_containers: Dict[UUID, Dict[Container, Tuple[ContainerWrapper, int]]] = {}
for job in jobs.values():
if job.config.name not in TARGETS:
self.logger.error("unknown job target: %s", job.config.name)
continue
tasks = self.of.jobs.tasks.list(job.job_id)
job_tasks[job.job_id] = tasks
check_containers[job.job_id] = {}
for task in tasks:
for container in task.config.containers:
if container.type in TARGETS[job.config.name].wait_for_files:
count = TARGETS[job.config.name].wait_for_files[container.type]
check_containers[job.job_id][container.name] = (
ContainerWrapper(
self.of.containers.get(container.name).sas_url
),
count,
)
self.success = True
self.logger.info("checking %d jobs", len(jobs))
def check_jobs_impl(
self,
) -> Tuple[bool, str, bool]:
self.cleared = False
def clear() -> None:
if not self.cleared:
self.cleared = True
print("")
if poll:
print("")
if self.jobs:
finished_job: Set[UUID] = set()
def check_jobs_impl() -> Tuple[bool, str, bool]:
self.cleared = False
failed_jobs: Set[UUID] = set()
job_task_states: Dict[UUID, Set[TaskTestState]] = {}
# check all the containers we care about for the job
for job_id in self.containers:
done: Set[ContainerWrapper] = set()
for container in self.containers[job_id]:
if len(container.list_blobs()) > 0:
for job_id in check_containers:
finished_containers: Set[Container] = set()
for (container_name, container_impl) in check_containers[
job_id
].items():
container_client, count = container_impl
if len(container_client.list_blobs()) >= count:
clear()
self.logger.info(
"new files in: %s", container.client.container_name
"found files for %s - %s",
jobs[job_id].config.name,
container_name,
)
done.add(container)
for container in done:
self.containers[job_id].remove(container)
if not self.containers[job_id]:
clear()
self.logger.info("finished: %s", self.jobs[job_id].config.name)
finished_job.add(job_id)
finished_containers.add(container_name)
# check all the tasks associated with the job
if self.tasks:
scalesets = self.of.scalesets.list()
for task_id in self.tasks:
error = self.check_task(task_id, scalesets)
if error is not None:
for container_name in finished_containers:
del check_containers[job_id][container_name]
scalesets = self.of.scalesets.list()
for job_id in job_tasks:
finished_tasks: Set[UUID] = set()
job_task_states[job_id] = set()
for task in job_tasks[job_id]:
if job_id not in jobs:
continue
task_result = self.check_task(jobs[job_id], task, scalesets)
if task_result == TaskTestState.failed:
self.success = False
failed_jobs.add(job_id)
elif task_result == TaskTestState.stopped:
finished_tasks.add(task.task_id)
else:
job_task_states[job_id].add(task_result)
job_tasks[job_id] = [
x for x in job_tasks[job_id] if x.task_id not in finished_tasks
]
to_remove: Set[UUID] = set()
for job in jobs.values():
# stop tracking failed jobs
if job.job_id in failed_jobs:
if job.job_id in check_containers:
del check_containers[job.job_id]
if job.job_id in job_tasks:
del job_tasks[job.job_id]
continue
# stop checking containers once all the containers for the job
# have checked out.
if job.job_id in check_containers:
if not check_containers[job.job_id]:
clear()
self.logger.error(error)
finished_job.add(self.tasks[task_id])
self.failed_jobs.add(self.tasks[task_id])
self.logger.info(
"found files in all containers for %s", job.config.name
)
del check_containers[job.job_id]
# cleanup jobs that are done testing
for job_id in finished_job:
self.stop_template(
self.jobs[job_id].config.name, delete_containers=False
)
if job.job_id not in check_containers:
if job.job_id in job_task_states:
if set([TaskTestState.running]).issuperset(
job_task_states[job.job_id]
):
del job_tasks[job.job_id]
for task_id, task_job_id in list(self.tasks.items()):
if job_id == task_job_id:
del self.tasks[task_id]
if job.job_id not in job_tasks and job.job_id not in check_containers:
clear()
self.logger.info("%s completed", job.config.name)
to_remove.add(job.job_id)
if job_id in self.jobs:
self.successful_jobs.add(job_id)
del self.jobs[job_id]
for job_id in to_remove:
if stop_on_complete_check:
self.stop_job(jobs[job_id])
del jobs[job_id]
if job_id in self.containers:
del self.containers[job_id]
msg = "waiting on: %s" % ",".join(
sorted(x.config.name for x in jobs.values())
)
if poll and len(msg) > 80:
msg = "waiting on %d jobs" % len(jobs)
msg = "waiting on: %s" % ",".join(
sorted(x.config.name for x in self.jobs.values())
)
if len(msg) > 80:
msg = "waiting on %d jobs" % len(self.jobs)
if not jobs:
msg = "done all tasks"
return (
not bool(self.jobs),
msg,
not bool(self.failed_jobs),
)
return (not bool(jobs), msg, self.success)
def check_jobs(self) -> bool:
""" Check all of the integration jobs """
self.logger.info("checking jobs")
return wait(self.check_jobs_impl)
def get_job_crash(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
# get the crash container for a given job
if poll:
return wait(check_jobs_impl)
else:
_, msg, result = check_jobs_impl()
self.logger.info(msg)
return result
def get_job_crash_report(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
for task in self.of.tasks.list(job_id=job_id, state=None):
for container in task.config.containers:
if container.type != ContainerType.unique_reports:
if container.type not in [
ContainerType.unique_reports,
ContainerType.reports,
]:
continue
files = self.of.containers.files.list(container.name)
if len(files.files) > 0:
return (container.name, files.files[0])
return None
def launch_repro(self) -> None:
def launch_repro(self) -> Tuple[bool, Dict[UUID, Tuple[Job, Repro]]]:
# launch repro for one report from all succeessful jobs
has_cdb = bool(which("cdb.exe"))
has_gdb = bool(which("gdb"))
for job_id in self.successful_jobs:
if not TARGETS[self.target_jobs[job_id]].test_repro:
self.logger.info("skipping repro for %s", self.target_jobs[job_id])
jobs = self.get_jobs()
result = True
repros = {}
for job in jobs:
if not TARGETS[job.config.name].test_repro:
self.logger.info("not testing repro for %s", job.config.name)
continue
if self.job_os[job_id] == OS.linux and not has_gdb:
if TARGETS[job.config.name].os == OS.linux and not has_gdb:
self.logger.warning(
"missing gdb in path, not launching repro: %s",
self.target_jobs[job_id],
"skipping repro for %s, missing gdb", job.config.name
)
continue
if self.job_os[job_id] == OS.windows and not has_cdb:
if TARGETS[job.config.name].os == OS.windows and not has_cdb:
self.logger.warning(
"missing cdb in path, not launching repro: %s",
self.target_jobs[job_id],
"skipping repro for %s, missing cdb", job.config.name
)
continue
self.logger.info("launching repro: %s", self.target_jobs[job_id])
report = self.get_job_crash(job_id)
report = self.get_job_crash_report(job.job_id)
if report is None:
self.logger.warning(
"target does not include crash reports: %s",
self.target_jobs[job_id],
)
return
(container, path) = report
self.repros[job_id] = self.of.repro.create(container, path, duration=1)
def check_repro_impl(
self,
) -> Tuple[bool, str, bool]:
# check all of the launched repros
self.cleared = False
def clear() -> None:
if not self.cleared:
self.cleared = True
print("")
commands: Dict[OS, Tuple[str, str]] = {
OS.windows: ("r rip", r"^rip=[a-f0-9]{16}"),
OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
}
info: Dict[str, List[str]] = {}
done: Set[UUID] = set()
for job_id, repro in self.repros.items():
repro = self.of.repro.get(repro.vm_id)
if repro.error:
clear()
self.logger.error(
"repro failed: %s: %s", self.target_jobs[job_id], repro.error
"target does not include crash reports: %s", job.config.name
)
self.failed_jobs.add(job_id)
done.add(job_id)
elif repro.state not in [VmState.init, VmState.extensions_launch]:
done.add(job_id)
try:
result = self.of.repro.connect(
repro.vm_id,
delete_after_use=True,
debug_command=commands[repro.os][0],
)
if result is not None and re.search(
commands[repro.os][1], result, re.MULTILINE
):
clear()
self.logger.info(
"repro succeeded: %s", self.target_jobs[job_id]
)
self.failed_jobs.add(job_id)
done.add(job_id)
else:
clear()
self.logger.error(
"repro failed: %s: %s", self.target_jobs[job_id], result
)
self.failed_jobs.add(job_id)
done.add(job_id)
except Exception as e:
result = False
else:
self.logger.info("launching repro: %s", job.config.name)
(container, path) = report
repro = self.of.repro.create(container, path, duration=1)
repros[job.job_id] = (job, repro)
return (result, repros)
def check_repro(self, repros: Dict[UUID, Tuple[Job, Repro]]) -> bool:
self.logger.info("checking repros")
self.success = True
def check_repro_impl() -> Tuple[bool, str, bool]:
# check all of the launched repros
self.cleared = False
def clear() -> None:
if not self.cleared:
self.cleared = True
print("")
commands: Dict[OS, Tuple[str, str]] = {
OS.windows: ("r rip", r"^rip=[a-f0-9]{16}"),
OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
}
for (job, repro) in list(repros.values()):
repros[job.job_id] = (job, self.of.repro.get(repro.vm_id))
for (job, repro) in list(repros.values()):
if repro.error:
clear()
self.logger.error(
"repro failed: %s: %s", self.target_jobs[job_id], repr(e)
"repro failed: %s: %s",
job.config.name,
repro.error,
)
self.failed_jobs.add(job_id)
done.add(job_id)
else:
if repro.state.name not in info:
info[repro.state.name] = []
info[repro.state.name].append(self.target_jobs[job_id])
self.of.repro.delete(repro.vm_id)
del repros[job.job_id]
elif repro.state == VmState.running:
try:
result = self.of.repro.connect(
repro.vm_id,
delete_after_use=True,
debug_command=commands[repro.os][0],
)
if result is not None and re.search(
commands[repro.os][1], result, re.MULTILINE
):
clear()
self.logger.info("repro succeeded: %s", job.config.name)
else:
clear()
self.logger.error(
"repro failed: %s - %s", job.config.name, result
)
except Exception as err:
clear()
self.logger.error("repro failed: %s - %s", job.config.name, err)
del repros[job.job_id]
elif repro.state not in [VmState.init, VmState.extensions_launch]:
self.logger.error(
"repro failed: %s - bad state: %s", job.config.name, repro.state
)
del repros[job.job_id]
for job_id in done:
self.of.repro.delete(self.repros[job_id].vm_id)
del self.repros[job_id]
repro_states: Dict[str, List[str]] = {}
for (job, repro) in repros.values():
if repro.state.name not in repro_states:
repro_states[repro.state.name] = []
repro_states[repro.state.name].append(job.config.name)
logline = []
for name in info:
logline.append("%s:%s" % (name, ",".join(info[name])))
logline = []
for state in repro_states:
logline.append("%s:%s" % (state, ",".join(repro_states[state])))
msg = "waiting repro: %s" % " ".join(logline)
if len(logline) > 80:
msg = "waiting on %d repros" % len(self.repros)
msg = "waiting repro: %s" % " ".join(logline)
if len(msg) > 80:
msg = "waiting on %d repros" % len(repros)
return (not bool(repros), msg, self.success)
return (
not bool(self.repros),
msg,
bool(self.failed_jobs),
return wait(check_repro_impl)
def get_jobs(self) -> List[Job]:
jobs = self.of.jobs.list(job_state=None)
jobs = [x for x in jobs if x.config.project == self.project]
return jobs
def stop_job(self, job: Job, delete_containers: bool = False) -> None:
self.of.template.stop(
job.config.project,
job.config.name,
BUILD,
delete_containers=delete_containers,
)
def check_repro(self) -> bool:
self.logger.info("checking repros")
return wait(self.check_repro_impl)
def get_pools(self) -> List[Pool]:
pools = self.of.pools.list()
pools = [x for x in pools if x.name == f"testpool-{x.os.name}-{self.test_id}"]
return pools
def stop_template(self, target: str, delete_containers: bool = True) -> None:
""" stop a specific template """
if self.skip_cleanup:
self.logger.warning("not cleaning up target: %s", target)
else:
self.of.template.stop(
self.project,
target,
BUILD,
delete_containers=delete_containers,
stop_notifications=True,
)
def cleanup(self, *, user_pools: Optional[Dict[str, str]] = None) -> bool:
def cleanup(self) -> None:
""" cleanup all of the integration pools & jobs """
if self.skip_cleanup:
self.logger.warning("not cleaning up")
return True
self.logger.info("cleaning up")
errors: List[Exception] = []
for target, config in TARGETS.items():
if config.os not in self.os:
continue
if target not in self.targets:
continue
jobs = self.get_jobs()
for job in jobs:
try:
self.logger.info("stopping %s", target)
self.stop_template(target, delete_containers=False)
self.stop_job(job, delete_containers=True)
except Exception as e:
self.logger.error("cleanup of %s failed", target)
self.logger.error("cleanup of job failed: %s - %s", job, e)
errors.append(e)
for pool in self.pools.values():
if user_pools and pool.name in user_pools.values():
continue
for pool in self.get_pools():
self.logger.info(
"halting: %s:%s:%s", pool.name, pool.os.name, pool.arch.name
)
@ -595,52 +615,115 @@ class TestOnefuzz:
self.logger.error("cleanup of pool failed: %s - %s", pool.name, e)
errors.append(e)
for repro in self.repros.values():
try:
self.of.repro.delete(repro.vm_id)
except Exception as e:
self.logger.error("cleanup of repro failed: %s - %s", repro.vm_id, e)
errors.append(e)
container_names = set()
for job in jobs:
for task in self.of.tasks.list(job_id=job.job_id, state=None):
for container in task.config.containers:
if container.type in [
ContainerType.reports,
ContainerType.unique_reports,
]:
container_names.add(container.name)
return not bool(errors)
for repro in self.of.repro.list():
if repro.config.container in container_names:
try:
self.of.repro.delete(repro.vm_id)
except Exception as e:
self.logger.error("cleanup of repro failed: %s %s", repro.vm_id, e)
errors.append(e)
if errors:
raise Exception("cleanup failed")
class Run(Command):
def check_jobs(
self,
test_id: UUID,
*,
endpoint: Optional[str],
poll: bool = False,
stop_on_complete_check: bool = False,
) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
result = tester.check_jobs(
poll=poll, stop_on_complete_check=stop_on_complete_check
)
if not result:
raise Exception("jobs failed")
def check_repros(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
launch_result, repros = tester.launch_repro()
result = tester.check_repro(repros)
if not (result and launch_result):
raise Exception("repros failed")
def launch(
self,
samples: Directory,
*,
endpoint: Optional[str] = None,
pool_size: int = 10,
region: Optional[Region] = None,
os_list: List[OS] = [OS.linux, OS.windows],
targets: List[str] = list(TARGETS.keys()),
test_id: Optional[UUID] = None,
duration: int = 1,
) -> UUID:
if test_id is None:
test_id = uuid4()
self.logger.info("launching test_id: %s", test_id)
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
tester.setup(region=region, pool_size=pool_size, os_list=os_list)
tester.launch(samples, os_list=os_list, targets=targets, duration=duration)
return test_id
def cleanup(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id=test_id)
tester.cleanup()
def test(
self,
samples: Directory,
*,
endpoint: Optional[str] = None,
user_pools: Optional[Dict[str, str]] = None,
pool_size: int = 10,
pool_size: int = 15,
region: Optional[Region] = None,
os_list: List[OS] = [OS.linux, OS.windows],
targets: List[str] = list(TARGETS.keys()),
skip_repro: bool = False,
skip_cleanup: bool = False,
duration: int = 1,
) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(
self.onefuzz,
self.logger,
pool_size=pool_size,
os_list=os_list,
targets=targets,
skip_cleanup=skip_cleanup,
)
success = True
test_id = uuid4()
error: Optional[Exception] = None
try:
tester.setup(region=region, user_pools=user_pools)
tester.launch(samples)
tester.check_jobs()
self.launch(
samples,
endpoint=endpoint,
pool_size=pool_size,
region=region,
os_list=os_list,
targets=targets,
test_id=test_id,
duration=duration,
)
self.check_jobs(
test_id, endpoint=endpoint, poll=True, stop_on_complete_check=True
)
if skip_repro:
self.logger.warning("not testing crash repro")
else:
self.logger.info("launching crash repro tests")
tester.launch_repro()
tester.check_repro()
self.check_repros(test_id, endpoint=endpoint)
except Exception as e:
self.logger.error("testing failed: %s", repr(e))
error = e
@ -649,10 +732,11 @@ class Run(Command):
self.logger.error("interrupted testing")
success = False
if not tester.cleanup(user_pools=user_pools):
success = False
if tester.failed_jobs or tester.failed_repro:
try:
self.cleanup(test_id, endpoint=endpoint)
except Exception as e:
self.logger.error("testing failed: %s", repr(e))
error = e
success = False
if error: