enable long-running integration tests (#654)

This commit is contained in:
bmc-msft
2021-03-10 17:03:15 -05:00
committed by GitHub
parent f055e73b13
commit f6a426cc07

View File

@ -7,13 +7,13 @@
""" Launch multiple templates using samples to verify Onefuzz works end-to-end """ """ Launch multiple templates using samples to verify Onefuzz works end-to-end """
# NOTE: # NOTE:
# 1. This script uses pre-built fuzzing samples from the onefuzz-samples project. # 1. This script uses an unpacked version of the `integration-test-results`
# https://github.com/microsoft/onefuzz-samples/releases/latest # from the CI pipeline.
# #
# 2. This script will create new pools & managed scalesets during the testing by # Check out https://github.com/microsoft/onefuzz/actions/workflows/
# default. To use pre-existing pools, specify `--user_pools os=pool_name` # ci.yml?query=branch%3Amain+is%3Asuccess
# #
# 3. For each stage, this script launches everything for the stage in batch, then # 2. For each stage, this script launches everything for the stage in batch, then
# checks on each of the created items for the stage. This batch processing # checks on each of the created items for the stage. This batch processing
# allows testing multiple components concurrently. # allows testing multiple components concurrently.
@ -30,7 +30,7 @@ from onefuzz.api import Command, Onefuzz
from onefuzz.backend import ContainerWrapper, wait from onefuzz.backend import ContainerWrapper, wait
from onefuzz.cli import execute_api from onefuzz.cli import execute_api
from onefuzztypes.enums import OS, ContainerType, TaskState, VmState from onefuzztypes.enums import OS, ContainerType, TaskState, VmState
from onefuzztypes.models import Job, Pool, Repro, Scaleset from onefuzztypes.models import Job, Pool, Repro, Scaleset, Task
from onefuzztypes.primitives import Container, Directory, File, PoolName, Region from onefuzztypes.primitives import Container, Directory, File, PoolName, Region
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -39,6 +39,13 @@ WINDOWS_POOL = "linux-test"
BUILD = "0" BUILD = "0"
class TaskTestState(Enum):
not_running = "not_running"
running = "running"
stopped = "stopped"
failed = "failed"
class TemplateType(Enum): class TemplateType(Enum):
libfuzzer = "libfuzzer" libfuzzer = "libfuzzer"
libfuzzer_dotnet = "libfuzzer_dotnet" libfuzzer_dotnet = "libfuzzer_dotnet"
@ -54,7 +61,7 @@ class Integration(BaseModel):
inputs: Optional[str] inputs: Optional[str]
use_setup: bool = Field(default=False) use_setup: bool = Field(default=False)
nested_setup_dir: Optional[str] nested_setup_dir: Optional[str]
wait_for_files: List[ContainerType] wait_for_files: Dict[ContainerType, int]
check_asan_log: Optional[bool] = Field(default=False) check_asan_log: Optional[bool] = Field(default=False)
disable_check_debugger: Optional[bool] = Field(default=False) disable_check_debugger: Optional[bool] = Field(default=False)
reboot_after_setup: Optional[bool] = Field(default=False) reboot_after_setup: Optional[bool] = Field(default=False)
@ -67,14 +74,18 @@ TARGETS: Dict[str, Integration] = {
os=OS.linux, os=OS.linux,
target_exe="fuzz.exe", target_exe="fuzz.exe",
inputs="seeds", inputs="seeds",
wait_for_files=[ContainerType.unique_reports], wait_for_files={ContainerType.unique_reports: 1},
), ),
"linux-libfuzzer": Integration( "linux-libfuzzer": Integration(
template=TemplateType.libfuzzer, template=TemplateType.libfuzzer,
os=OS.linux, os=OS.linux,
target_exe="fuzz.exe", target_exe="fuzz.exe",
inputs="seeds", inputs="seeds",
wait_for_files=[ContainerType.unique_reports, ContainerType.coverage], wait_for_files={
ContainerType.unique_reports: 1,
ContainerType.coverage: 1,
ContainerType.inputs: 2,
},
reboot_after_setup=True, reboot_after_setup=True,
), ),
"linux-libfuzzer-dotnet": Integration( "linux-libfuzzer-dotnet": Integration(
@ -84,7 +95,8 @@ TARGETS: Dict[str, Integration] = {
nested_setup_dir="my-fuzzer", nested_setup_dir="my-fuzzer",
inputs="inputs", inputs="inputs",
use_setup=True, use_setup=True,
wait_for_files=[ContainerType.inputs, ContainerType.crashes], wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
test_repro=False,
), ),
"linux-libfuzzer-aarch64-crosscompile": Integration( "linux-libfuzzer-aarch64-crosscompile": Integration(
template=TemplateType.libfuzzer_qemu_user, template=TemplateType.libfuzzer_qemu_user,
@ -92,28 +104,28 @@ TARGETS: Dict[str, Integration] = {
target_exe="fuzz.exe", target_exe="fuzz.exe",
inputs="inputs", inputs="inputs",
use_setup=True, use_setup=True,
wait_for_files=[ContainerType.inputs, ContainerType.crashes], wait_for_files={ContainerType.inputs: 2, ContainerType.crashes: 1},
test_repro=False, test_repro=False,
), ),
"linux-libfuzzer-rust": Integration( "linux-libfuzzer-rust": Integration(
template=TemplateType.libfuzzer, template=TemplateType.libfuzzer,
os=OS.linux, os=OS.linux,
target_exe="fuzz_target_1", target_exe="fuzz_target_1",
wait_for_files=[ContainerType.unique_reports, ContainerType.coverage], wait_for_files={ContainerType.unique_reports: 1, ContainerType.coverage: 1},
), ),
"linux-trivial-crash": Integration( "linux-trivial-crash": Integration(
template=TemplateType.radamsa, template=TemplateType.radamsa,
os=OS.linux, os=OS.linux,
target_exe="fuzz.exe", target_exe="fuzz.exe",
inputs="seeds", inputs="seeds",
wait_for_files=[ContainerType.unique_reports], wait_for_files={ContainerType.unique_reports: 1},
), ),
"linux-trivial-crash-asan": Integration( "linux-trivial-crash-asan": Integration(
template=TemplateType.radamsa, template=TemplateType.radamsa,
os=OS.linux, os=OS.linux,
target_exe="fuzz.exe", target_exe="fuzz.exe",
inputs="seeds", inputs="seeds",
wait_for_files=[ContainerType.unique_reports], wait_for_files={ContainerType.unique_reports: 1},
check_asan_log=True, check_asan_log=True,
disable_check_debugger=True, disable_check_debugger=True,
), ),
@ -122,89 +134,53 @@ TARGETS: Dict[str, Integration] = {
os=OS.windows, os=OS.windows,
target_exe="fuzz.exe", target_exe="fuzz.exe",
inputs="seeds", inputs="seeds",
wait_for_files=[ wait_for_files={
ContainerType.unique_reports, ContainerType.inputs: 2,
ContainerType.coverage, ContainerType.unique_reports: 1,
], ContainerType.coverage: 1,
},
), ),
"windows-trivial-crash": Integration( "windows-trivial-crash": Integration(
template=TemplateType.radamsa, template=TemplateType.radamsa,
os=OS.windows, os=OS.windows,
target_exe="fuzz.exe", target_exe="fuzz.exe",
inputs="seeds", inputs="seeds",
wait_for_files=[ContainerType.unique_reports], wait_for_files={ContainerType.unique_reports: 1},
), ),
} }
class TestOnefuzz: class TestOnefuzz:
def __init__( def __init__(self, onefuzz: Onefuzz, logger: logging.Logger, test_id: UUID) -> None:
self,
onefuzz: Onefuzz,
logger: logging.Logger,
*,
pool_size: int,
os_list: List[OS],
targets: List[str],
skip_cleanup: bool,
) -> None:
self.of = onefuzz self.of = onefuzz
self.logger = logger self.logger = logger
self.pools: Dict[OS, Pool] = {} self.pools: Dict[OS, Pool] = {}
self.project = "test-" + str(uuid4()).split("-")[0] self.test_id = test_id
self.pool_size = pool_size self.project = f"test-{self.test_id}"
self.os = os_list
self.targets = targets
self.skip_cleanup = skip_cleanup
# job_id -> Job
self.jobs: Dict[UUID, Job] = {}
# job_id -> List[container_url]
self.containers: Dict[UUID, List[ContainerWrapper]] = {}
# task_id -> job_id
self.tasks: Dict[UUID, UUID] = {}
self.job_os: Dict[UUID, OS] = {}
self.successful_jobs: Set[UUID] = set()
self.failed_jobs: Set[UUID] = set()
self.failed_repro: Set[UUID] = set()
# job_id -> Repro
self.repros: Dict[UUID, Repro] = {}
# job_id -> target
self.target_jobs: Dict[UUID, str] = {}
def setup( def setup(
self, self,
*, *,
region: Optional[Region] = None, region: Optional[Region] = None,
user_pools: Optional[Dict[str, str]] = None, pool_size: int,
os_list: List[OS],
) -> None: ) -> None:
for entry in self.os: for entry in os_list:
if user_pools and entry.name in user_pools: name = PoolName(f"testpool-{entry.name}-{self.test_id}")
self.logger.info(
"using existing pool: %s:%s", entry.name, user_pools[entry.name]
)
self.pools[entry] = self.of.pools.get(user_pools[entry.name])
else:
name = PoolName("pool-%s-%s" % (self.project, entry.name))
self.logger.info("creating pool: %s:%s", entry.name, name) self.logger.info("creating pool: %s:%s", entry.name, name)
self.pools[entry] = self.of.pools.create(name, entry) self.pools[entry] = self.of.pools.create(name, entry)
self.logger.info("creating scaleset for pool: %s", name) self.logger.info("creating scaleset for pool: %s", name)
self.of.scalesets.create(name, self.pool_size, region=region) self.of.scalesets.create(name, pool_size, region=region)
def launch(self, path: str) -> None: def launch(
self, path: Directory, *, os_list: List[OS], targets: List[str], duration=int
) -> None:
""" Launch all of the fuzzing templates """ """ Launch all of the fuzzing templates """
for target, config in TARGETS.items(): for target, config in TARGETS.items():
if target not in self.targets: if target not in targets:
continue continue
if config.os not in self.os: if config.os not in os_list:
continue continue
self.logger.info("launching: %s", target) self.logger.info("launching: %s", target)
@ -230,7 +206,7 @@ class TestOnefuzz:
target_exe=target_exe, target_exe=target_exe,
inputs=inputs, inputs=inputs,
setup_dir=setup, setup_dir=setup,
duration=1, duration=duration,
vm_count=1, vm_count=1,
reboot_after_setup=config.reboot_after_setup or False, reboot_after_setup=config.reboot_after_setup or False,
) )
@ -245,7 +221,7 @@ class TestOnefuzz:
target_harness=config.target_exe, target_harness=config.target_exe,
inputs=inputs, inputs=inputs,
setup_dir=setup, setup_dir=setup,
duration=1, duration=duration,
vm_count=1, vm_count=1,
) )
elif config.template == TemplateType.libfuzzer_qemu_user: elif config.template == TemplateType.libfuzzer_qemu_user:
@ -256,7 +232,7 @@ class TestOnefuzz:
self.pools[config.os].name, self.pools[config.os].name,
inputs=inputs, inputs=inputs,
target_exe=target_exe, target_exe=target_exe,
duration=1, duration=duration,
vm_count=1, vm_count=1,
) )
elif config.template == TemplateType.radamsa: elif config.template == TemplateType.radamsa:
@ -270,7 +246,7 @@ class TestOnefuzz:
setup_dir=setup, setup_dir=setup,
check_asan_log=config.check_asan_log or False, check_asan_log=config.check_asan_log or False,
disable_check_debugger=config.disable_check_debugger or False, disable_check_debugger=config.disable_check_debugger or False,
duration=1, duration=duration,
vm_count=1, vm_count=1,
) )
elif config.template == TemplateType.afl: elif config.template == TemplateType.afl:
@ -282,7 +258,7 @@ class TestOnefuzz:
target_exe=target_exe, target_exe=target_exe,
inputs=inputs, inputs=inputs,
setup_dir=setup, setup_dir=setup,
duration=1, duration=duration,
vm_count=1, vm_count=1,
) )
else: else:
@ -291,21 +267,9 @@ class TestOnefuzz:
if not job: if not job:
raise Exception("missing job") raise Exception("missing job")
self.containers[job.job_id] = [] def check_task(
for task in self.of.tasks.list(job_id=job.job_id): self, job: Job, task: Task, scalesets: List[Scaleset]
self.tasks[task.task_id] = job.job_id ) -> TaskTestState:
self.containers[job.job_id] += [
ContainerWrapper(self.of.containers.get(x.name).sas_url)
for x in task.config.containers
if x.type in TARGETS[job.config.name].wait_for_files
]
self.jobs[job.job_id] = job
self.job_os[job.job_id] = config.os
self.target_jobs[job.job_id] = target
def check_task(self, task_id: UUID, scalesets: List[Scaleset]) -> Optional[str]:
task = self.of.tasks.get(task_id)
# Check if the scaleset the task is assigned is OK # Check if the scaleset the task is assigned is OK
for scaleset in scalesets: for scaleset in scalesets:
if ( if (
@ -313,154 +277,231 @@ class TestOnefuzz:
and scaleset.pool_name == task.config.pool.pool_name and scaleset.pool_name == task.config.pool.pool_name
and scaleset.state not in scaleset.state.available() and scaleset.state not in scaleset.state.available()
): ):
return "task scaleset failed: %s - %s - %s (%s)" % ( self.logger.error(
self.jobs[self.tasks[task_id]].config.name, "task scaleset failed: %s - %s - %s (%s)",
job.config.name,
task.config.task.type.name, task.config.task.type.name,
scaleset.state.name, scaleset.state.name,
scaleset.error, scaleset.error,
) )
return TaskTestState.failed
task = self.of.tasks.get(task.task_id)
# check if the task itself has an error # check if the task itself has an error
if task.error is not None: if task.error is not None:
return "task failed: %s - %s - %s (%s)" % ( self.logger.error(
task_id, "task failed: %s - %s (%s)",
self.jobs[self.tasks[task_id]].config.name, job.config.name,
task.config.task.type.name, task.config.task.type.name,
task.error, task.error,
) )
return TaskTestState.failed
# just in case someone else stopped the task if task.state in [TaskState.stopped, TaskState.stopping]:
if task.state in TaskState.shutting_down(): return TaskTestState.stopped
return "task shutdown early: %s - %s" % (
self.jobs[self.tasks[task_id]].config.name, if task.state == TaskState.running:
task.config.task.type.name, return TaskTestState.running
return TaskTestState.not_running
def check_jobs(
self, poll: bool = False, stop_on_complete_check: bool = False
) -> bool:
""" Check all of the integration jobs """
jobs: Dict[UUID, Job] = {x.job_id: x for x in self.get_jobs()}
job_tasks: Dict[UUID, List[Task]] = {}
check_containers: Dict[UUID, Dict[Container, Tuple[ContainerWrapper, int]]] = {}
for job in jobs.values():
if job.config.name not in TARGETS:
self.logger.error("unknown job target: %s", job.config.name)
continue
tasks = self.of.jobs.tasks.list(job.job_id)
job_tasks[job.job_id] = tasks
check_containers[job.job_id] = {}
for task in tasks:
for container in task.config.containers:
if container.type in TARGETS[job.config.name].wait_for_files:
count = TARGETS[job.config.name].wait_for_files[container.type]
check_containers[job.job_id][container.name] = (
ContainerWrapper(
self.of.containers.get(container.name).sas_url
),
count,
) )
return None
def check_jobs_impl( self.success = True
self, self.logger.info("checking %d jobs", len(jobs))
) -> Tuple[bool, str, bool]:
self.cleared = False self.cleared = False
def clear() -> None: def clear() -> None:
if not self.cleared: if not self.cleared:
self.cleared = True self.cleared = True
if poll:
print("") print("")
if self.jobs: def check_jobs_impl() -> Tuple[bool, str, bool]:
finished_job: Set[UUID] = set() self.cleared = False
failed_jobs: Set[UUID] = set()
job_task_states: Dict[UUID, Set[TaskTestState]] = {}
# check all the containers we care about for the job for job_id in check_containers:
for job_id in self.containers: finished_containers: Set[Container] = set()
done: Set[ContainerWrapper] = set() for (container_name, container_impl) in check_containers[
for container in self.containers[job_id]: job_id
if len(container.list_blobs()) > 0: ].items():
container_client, count = container_impl
if len(container_client.list_blobs()) >= count:
clear() clear()
self.logger.info( self.logger.info(
"new files in: %s", container.client.container_name "found files for %s - %s",
jobs[job_id].config.name,
container_name,
) )
done.add(container) finished_containers.add(container_name)
for container in done:
self.containers[job_id].remove(container) for container_name in finished_containers:
if not self.containers[job_id]: del check_containers[job_id][container_name]
clear()
self.logger.info("finished: %s", self.jobs[job_id].config.name)
finished_job.add(job_id)
# check all the tasks associated with the job
if self.tasks:
scalesets = self.of.scalesets.list() scalesets = self.of.scalesets.list()
for task_id in self.tasks: for job_id in job_tasks:
error = self.check_task(task_id, scalesets) finished_tasks: Set[UUID] = set()
if error is not None: job_task_states[job_id] = set()
for task in job_tasks[job_id]:
if job_id not in jobs:
continue
task_result = self.check_task(jobs[job_id], task, scalesets)
if task_result == TaskTestState.failed:
self.success = False
failed_jobs.add(job_id)
elif task_result == TaskTestState.stopped:
finished_tasks.add(task.task_id)
else:
job_task_states[job_id].add(task_result)
job_tasks[job_id] = [
x for x in job_tasks[job_id] if x.task_id not in finished_tasks
]
to_remove: Set[UUID] = set()
for job in jobs.values():
# stop tracking failed jobs
if job.job_id in failed_jobs:
if job.job_id in check_containers:
del check_containers[job.job_id]
if job.job_id in job_tasks:
del job_tasks[job.job_id]
continue
# stop checking containers once all the containers for the job
# have checked out.
if job.job_id in check_containers:
if not check_containers[job.job_id]:
clear() clear()
self.logger.error(error) self.logger.info(
finished_job.add(self.tasks[task_id]) "found files in all containers for %s", job.config.name
self.failed_jobs.add(self.tasks[task_id])
# cleanup jobs that are done testing
for job_id in finished_job:
self.stop_template(
self.jobs[job_id].config.name, delete_containers=False
) )
del check_containers[job.job_id]
for task_id, task_job_id in list(self.tasks.items()): if job.job_id not in check_containers:
if job_id == task_job_id: if job.job_id in job_task_states:
del self.tasks[task_id] if set([TaskTestState.running]).issuperset(
job_task_states[job.job_id]
):
del job_tasks[job.job_id]
if job_id in self.jobs: if job.job_id not in job_tasks and job.job_id not in check_containers:
self.successful_jobs.add(job_id) clear()
del self.jobs[job_id] self.logger.info("%s completed", job.config.name)
to_remove.add(job.job_id)
if job_id in self.containers: for job_id in to_remove:
del self.containers[job_id] if stop_on_complete_check:
self.stop_job(jobs[job_id])
del jobs[job_id]
msg = "waiting on: %s" % ",".join( msg = "waiting on: %s" % ",".join(
sorted(x.config.name for x in self.jobs.values()) sorted(x.config.name for x in jobs.values())
) )
if len(msg) > 80: if poll and len(msg) > 80:
msg = "waiting on %d jobs" % len(self.jobs) msg = "waiting on %d jobs" % len(jobs)
return ( if not jobs:
not bool(self.jobs), msg = "done all tasks"
msg,
not bool(self.failed_jobs),
)
def check_jobs(self) -> bool: return (not bool(jobs), msg, self.success)
""" Check all of the integration jobs """
self.logger.info("checking jobs")
return wait(self.check_jobs_impl)
def get_job_crash(self, job_id: UUID) -> Optional[Tuple[Container, str]]: if poll:
# get the crash container for a given job return wait(check_jobs_impl)
else:
_, msg, result = check_jobs_impl()
self.logger.info(msg)
return result
def get_job_crash_report(self, job_id: UUID) -> Optional[Tuple[Container, str]]:
for task in self.of.tasks.list(job_id=job_id, state=None): for task in self.of.tasks.list(job_id=job_id, state=None):
for container in task.config.containers: for container in task.config.containers:
if container.type != ContainerType.unique_reports: if container.type not in [
ContainerType.unique_reports,
ContainerType.reports,
]:
continue continue
files = self.of.containers.files.list(container.name) files = self.of.containers.files.list(container.name)
if len(files.files) > 0: if len(files.files) > 0:
return (container.name, files.files[0]) return (container.name, files.files[0])
return None return None
def launch_repro(self) -> None: def launch_repro(self) -> Tuple[bool, Dict[UUID, Tuple[Job, Repro]]]:
# launch repro for one report from all succeessful jobs # launch repro for one report from all succeessful jobs
has_cdb = bool(which("cdb.exe")) has_cdb = bool(which("cdb.exe"))
has_gdb = bool(which("gdb")) has_gdb = bool(which("gdb"))
for job_id in self.successful_jobs:
if not TARGETS[self.target_jobs[job_id]].test_repro: jobs = self.get_jobs()
self.logger.info("skipping repro for %s", self.target_jobs[job_id])
result = True
repros = {}
for job in jobs:
if not TARGETS[job.config.name].test_repro:
self.logger.info("not testing repro for %s", job.config.name)
continue continue
if self.job_os[job_id] == OS.linux and not has_gdb: if TARGETS[job.config.name].os == OS.linux and not has_gdb:
self.logger.warning( self.logger.warning(
"missing gdb in path, not launching repro: %s", "skipping repro for %s, missing gdb", job.config.name
self.target_jobs[job_id],
) )
continue continue
if self.job_os[job_id] == OS.windows and not has_cdb: if TARGETS[job.config.name].os == OS.windows and not has_cdb:
self.logger.warning( self.logger.warning(
"missing cdb in path, not launching repro: %s", "skipping repro for %s, missing cdb", job.config.name
self.target_jobs[job_id],
) )
continue continue
self.logger.info("launching repro: %s", self.target_jobs[job_id]) report = self.get_job_crash_report(job.job_id)
report = self.get_job_crash(job_id)
if report is None: if report is None:
self.logger.warning( self.logger.error(
"target does not include crash reports: %s", "target does not include crash reports: %s", job.config.name
self.target_jobs[job_id],
) )
return result = False
else:
self.logger.info("launching repro: %s", job.config.name)
(container, path) = report (container, path) = report
self.repros[job_id] = self.of.repro.create(container, path, duration=1) repro = self.of.repro.create(container, path, duration=1)
repros[job.job_id] = (job, repro)
def check_repro_impl( return (result, repros)
self,
) -> Tuple[bool, str, bool]: def check_repro(self, repros: Dict[UUID, Tuple[Job, Repro]]) -> bool:
self.logger.info("checking repros")
self.success = True
def check_repro_impl() -> Tuple[bool, str, bool]:
# check all of the launched repros # check all of the launched repros
self.cleared = False self.cleared = False
@ -475,20 +516,20 @@ class TestOnefuzz:
OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"), OS.linux: ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
} }
info: Dict[str, List[str]] = {} for (job, repro) in list(repros.values()):
repros[job.job_id] = (job, self.of.repro.get(repro.vm_id))
done: Set[UUID] = set() for (job, repro) in list(repros.values()):
for job_id, repro in self.repros.items():
repro = self.of.repro.get(repro.vm_id)
if repro.error: if repro.error:
clear() clear()
self.logger.error( self.logger.error(
"repro failed: %s: %s", self.target_jobs[job_id], repro.error "repro failed: %s: %s",
job.config.name,
repro.error,
) )
self.failed_jobs.add(job_id) self.of.repro.delete(repro.vm_id)
done.add(job_id) del repros[job.job_id]
elif repro.state not in [VmState.init, VmState.extensions_launch]: elif repro.state == VmState.running:
done.add(job_id)
try: try:
result = self.of.repro.connect( result = self.of.repro.connect(
repro.vm_id, repro.vm_id,
@ -499,93 +540,72 @@ class TestOnefuzz:
commands[repro.os][1], result, re.MULTILINE commands[repro.os][1], result, re.MULTILINE
): ):
clear() clear()
self.logger.info( self.logger.info("repro succeeded: %s", job.config.name)
"repro succeeded: %s", self.target_jobs[job_id]
)
self.failed_jobs.add(job_id)
done.add(job_id)
else: else:
clear() clear()
self.logger.error( self.logger.error(
"repro failed: %s: %s", self.target_jobs[job_id], result "repro failed: %s - %s", job.config.name, result
) )
self.failed_jobs.add(job_id) except Exception as err:
done.add(job_id)
except Exception as e:
clear() clear()
self.logger.error("repro failed: %s - %s", job.config.name, err)
del repros[job.job_id]
elif repro.state not in [VmState.init, VmState.extensions_launch]:
self.logger.error( self.logger.error(
"repro failed: %s: %s", self.target_jobs[job_id], repr(e) "repro failed: %s - bad state: %s", job.config.name, repro.state
) )
self.failed_jobs.add(job_id) del repros[job.job_id]
done.add(job_id)
else:
if repro.state.name not in info:
info[repro.state.name] = []
info[repro.state.name].append(self.target_jobs[job_id])
for job_id in done: repro_states: Dict[str, List[str]] = {}
self.of.repro.delete(self.repros[job_id].vm_id) for (job, repro) in repros.values():
del self.repros[job_id] if repro.state.name not in repro_states:
repro_states[repro.state.name] = []
repro_states[repro.state.name].append(job.config.name)
logline = [] logline = []
for name in info: for state in repro_states:
logline.append("%s:%s" % (name, ",".join(info[name]))) logline.append("%s:%s" % (state, ",".join(repro_states[state])))
msg = "waiting repro: %s" % " ".join(logline) msg = "waiting repro: %s" % " ".join(logline)
if len(logline) > 80: if len(msg) > 80:
msg = "waiting on %d repros" % len(self.repros) msg = "waiting on %d repros" % len(repros)
return (not bool(repros), msg, self.success)
return ( return wait(check_repro_impl)
not bool(self.repros),
msg,
bool(self.failed_jobs),
)
def check_repro(self) -> bool: def get_jobs(self) -> List[Job]:
self.logger.info("checking repros") jobs = self.of.jobs.list(job_state=None)
return wait(self.check_repro_impl) jobs = [x for x in jobs if x.config.project == self.project]
return jobs
def stop_template(self, target: str, delete_containers: bool = True) -> None: def stop_job(self, job: Job, delete_containers: bool = False) -> None:
""" stop a specific template """
if self.skip_cleanup:
self.logger.warning("not cleaning up target: %s", target)
else:
self.of.template.stop( self.of.template.stop(
self.project, job.config.project,
target, job.config.name,
BUILD, BUILD,
delete_containers=delete_containers, delete_containers=delete_containers,
stop_notifications=True,
) )
def cleanup(self, *, user_pools: Optional[Dict[str, str]] = None) -> bool: def get_pools(self) -> List[Pool]:
""" cleanup all of the integration pools & jobs """ pools = self.of.pools.list()
pools = [x for x in pools if x.name == f"testpool-{x.os.name}-{self.test_id}"]
return pools
if self.skip_cleanup: def cleanup(self) -> None:
self.logger.warning("not cleaning up") """ cleanup all of the integration pools & jobs """
return True
self.logger.info("cleaning up") self.logger.info("cleaning up")
errors: List[Exception] = [] errors: List[Exception] = []
for target, config in TARGETS.items(): jobs = self.get_jobs()
if config.os not in self.os: for job in jobs:
continue
if target not in self.targets:
continue
try: try:
self.logger.info("stopping %s", target) self.stop_job(job, delete_containers=True)
self.stop_template(target, delete_containers=False)
except Exception as e: except Exception as e:
self.logger.error("cleanup of %s failed", target) self.logger.error("cleanup of job failed: %s - %s", job, e)
errors.append(e) errors.append(e)
for pool in self.pools.values(): for pool in self.get_pools():
if user_pools and pool.name in user_pools.values():
continue
self.logger.info( self.logger.info(
"halting: %s:%s:%s", pool.name, pool.os.name, pool.arch.name "halting: %s:%s:%s", pool.name, pool.os.name, pool.arch.name
) )
@ -595,52 +615,115 @@ class TestOnefuzz:
self.logger.error("cleanup of pool failed: %s - %s", pool.name, e) self.logger.error("cleanup of pool failed: %s - %s", pool.name, e)
errors.append(e) errors.append(e)
for repro in self.repros.values(): container_names = set()
for job in jobs:
for task in self.of.tasks.list(job_id=job.job_id, state=None):
for container in task.config.containers:
if container.type in [
ContainerType.reports,
ContainerType.unique_reports,
]:
container_names.add(container.name)
for repro in self.of.repro.list():
if repro.config.container in container_names:
try: try:
self.of.repro.delete(repro.vm_id) self.of.repro.delete(repro.vm_id)
except Exception as e: except Exception as e:
self.logger.error("cleanup of repro failed: %s - %s", repro.vm_id, e) self.logger.error("cleanup of repro failed: %s %s", repro.vm_id, e)
errors.append(e) errors.append(e)
return not bool(errors) if errors:
raise Exception("cleanup failed")
class Run(Command): class Run(Command):
def check_jobs(
self,
test_id: UUID,
*,
endpoint: Optional[str],
poll: bool = False,
stop_on_complete_check: bool = False,
) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
result = tester.check_jobs(
poll=poll, stop_on_complete_check=stop_on_complete_check
)
if not result:
raise Exception("jobs failed")
def check_repros(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
launch_result, repros = tester.launch_repro()
result = tester.check_repro(repros)
if not (result and launch_result):
raise Exception("repros failed")
def launch(
self,
samples: Directory,
*,
endpoint: Optional[str] = None,
pool_size: int = 10,
region: Optional[Region] = None,
os_list: List[OS] = [OS.linux, OS.windows],
targets: List[str] = list(TARGETS.keys()),
test_id: Optional[UUID] = None,
duration: int = 1,
) -> UUID:
if test_id is None:
test_id = uuid4()
self.logger.info("launching test_id: %s", test_id)
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id)
tester.setup(region=region, pool_size=pool_size, os_list=os_list)
tester.launch(samples, os_list=os_list, targets=targets, duration=duration)
return test_id
def cleanup(self, test_id: UUID, *, endpoint: Optional[str]) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(self.onefuzz, self.logger, test_id=test_id)
tester.cleanup()
def test( def test(
self, self,
samples: Directory, samples: Directory,
*, *,
endpoint: Optional[str] = None, endpoint: Optional[str] = None,
user_pools: Optional[Dict[str, str]] = None, pool_size: int = 15,
pool_size: int = 10,
region: Optional[Region] = None, region: Optional[Region] = None,
os_list: List[OS] = [OS.linux, OS.windows], os_list: List[OS] = [OS.linux, OS.windows],
targets: List[str] = list(TARGETS.keys()), targets: List[str] = list(TARGETS.keys()),
skip_repro: bool = False, skip_repro: bool = False,
skip_cleanup: bool = False, duration: int = 1,
) -> None: ) -> None:
self.onefuzz.__setup__(endpoint=endpoint)
tester = TestOnefuzz(
self.onefuzz,
self.logger,
pool_size=pool_size,
os_list=os_list,
targets=targets,
skip_cleanup=skip_cleanup,
)
success = True success = True
test_id = uuid4()
error: Optional[Exception] = None error: Optional[Exception] = None
try: try:
tester.setup(region=region, user_pools=user_pools) self.launch(
tester.launch(samples) samples,
tester.check_jobs() endpoint=endpoint,
pool_size=pool_size,
region=region,
os_list=os_list,
targets=targets,
test_id=test_id,
duration=duration,
)
self.check_jobs(
test_id, endpoint=endpoint, poll=True, stop_on_complete_check=True
)
if skip_repro: if skip_repro:
self.logger.warning("not testing crash repro") self.logger.warning("not testing crash repro")
else: else:
self.logger.info("launching crash repro tests") self.check_repros(test_id, endpoint=endpoint)
tester.launch_repro()
tester.check_repro()
except Exception as e: except Exception as e:
self.logger.error("testing failed: %s", repr(e)) self.logger.error("testing failed: %s", repr(e))
error = e error = e
@ -649,10 +732,11 @@ class Run(Command):
self.logger.error("interrupted testing") self.logger.error("interrupted testing")
success = False success = False
if not tester.cleanup(user_pools=user_pools): try:
success = False self.cleanup(test_id, endpoint=endpoint)
except Exception as e:
if tester.failed_jobs or tester.failed_repro: self.logger.error("testing failed: %s", repr(e))
error = e
success = False success = False
if error: if error: