Disable repro and debug VM CLI commands. (#3494)

* Disable  and  VM CLI commands.

* Formatting.

* More formatting.

* More formatting.

* Removing Repro check.
This commit is contained in:
Noah McGregor Harper 2023-09-27 13:53:29 -07:00 committed by Cheick Keita
parent ecb2d322d8
commit 7bcc41c67b
3 changed files with 9 additions and 519 deletions

View File

@ -9,7 +9,6 @@ import os
import pkgutil
import re
import subprocess # nosec
import time
import uuid
from enum import Enum
from shutil import which
@ -35,8 +34,7 @@ from six.moves import input # workaround for static analysis
from .__version__ import __version__
from .azcopy import azcopy_sync
from .backend import Backend, BackendConfig, ContainerWrapper, wait
from .ssh import build_ssh_command, ssh_connect, temp_file
from .backend import Backend, BackendConfig, ContainerWrapper
UUID_EXPANSION = TypeVar("UUID_EXPANSION", UUID, str)
@ -530,316 +528,6 @@ class Containers(Endpoint):
azcopy_sync(to_download[name], outdir)
class Repro(Endpoint):
"""Interact with Reproduction VMs"""
endpoint = "repro_vms"
def get(self, vm_id: UUID_EXPANSION) -> models.Repro:
"""get information about a Reproduction VM"""
vm_id_expanded = self._disambiguate_uuid(
"vm_id", vm_id, lambda: [str(x.vm_id) for x in self.list()]
)
self.logger.debug("get repro vm: %s", vm_id_expanded)
return self._req_model(
"GET", models.Repro, data=requests.ReproGet(vm_id=vm_id_expanded)
)
def get_files(
self,
report_container: primitives.Container,
report_name: str,
include_setup: bool = False,
output_dir: primitives.Directory = primitives.Directory("."),
) -> None:
"""downloads the files necessary to locally repro the crash from a given report"""
report_bytes = self.onefuzz.containers.files.get(report_container, report_name)
report = json.loads(report_bytes)
crash_info = {
"input_blob_container": primitives.Container(""),
"input_blob_name": "",
"job_id": "",
}
if "input_blob" in report:
crash_info["input_blob_container"] = report["input_blob"]["container"]
crash_info["input_blob_name"] = report["input_blob"]["name"]
crash_info["job_id"] = report["job_id"]
elif "crash_test_result" in report and "original_crash_test_result" in report:
if report["original_crash_test_result"]["crash_report"] is None:
self.logger.error(
"No crash report found in the original crash test result, repro files cannot be retrieved"
)
return
elif report["crash_test_result"]["crash_report"] is None:
self.logger.info(
"No crash report found in the new crash test result, falling back on the original crash test result for job_id"
"Note: if using --include_setup, the downloaded fuzzer binaries may be out-of-date"
)
original_report = report["original_crash_test_result"]["crash_report"]
new_report = (
report["crash_test_result"]["crash_report"] or original_report
) # fallback on original_report
crash_info["input_blob_container"] = original_report["input_blob"][
"container"
]
crash_info["input_blob_name"] = original_report["input_blob"]["name"]
crash_info["job_id"] = new_report["job_id"]
else:
self.logger.error(
"Encountered an unhandled report format, repro files cannot be retrieved"
)
return
self.logger.info(
"downloading files necessary to locally repro crash %s",
crash_info["input_blob_name"],
)
self.onefuzz.containers.files.download(
primitives.Container(crash_info["input_blob_container"]),
crash_info["input_blob_name"],
os.path.join(output_dir, crash_info["input_blob_name"]),
)
if include_setup:
setup_container = list(
self.onefuzz.jobs.containers.list(
crash_info["job_id"], enums.ContainerType.setup
)
)[0]
self.onefuzz.containers.files.download_dir(
primitives.Container(setup_container), output_dir
)
def create(
self, container: primitives.Container, path: str, duration: int = 24
) -> models.Repro:
"""Create a Reproduction VM from a Crash Report"""
self.logger.info(
"creating repro vm: %s %s (%d hours)", container, path, duration
)
return self._req_model(
"POST",
models.Repro,
data=models.ReproConfig(container=container, path=path, duration=duration),
)
def delete(self, vm_id: UUID_EXPANSION) -> models.Repro:
"""Delete a Reproduction VM"""
vm_id_expanded = self._disambiguate_uuid(
"vm_id", vm_id, lambda: [str(x.vm_id) for x in self.list()]
)
self.logger.debug("deleting repro vm: %s", vm_id_expanded)
return self._req_model(
"DELETE", models.Repro, data=requests.ReproGet(vm_id=vm_id_expanded)
)
def list(self) -> List[models.Repro]:
"""List all VMs"""
self.logger.debug("listing repro vms")
return self._req_model_list("GET", models.Repro, data=requests.ReproGet())
def _dbg_linux(
self, repro: models.Repro, debug_command: Optional[str]
) -> Optional[str]:
"""Launch gdb with GDB script that includes 'target remote | ssh ...'"""
if (
repro.auth is None
or repro.ip is None
or repro.state != enums.VmState.running
):
raise Exception("vm setup failed: %s" % repro.state)
with build_ssh_command(
repro.ip, repro.auth.private_key, command="-T"
) as ssh_cmd:
gdb_script = [
"target remote | %s sudo /onefuzz/bin/repro-stdout.sh"
% " ".join(ssh_cmd)
]
if debug_command:
gdb_script += [debug_command, "quit"]
with temp_file("gdb.script", "\n".join(gdb_script)) as gdb_script_path:
dbg = ["gdb", "--silent", "--command", gdb_script_path]
if debug_command:
dbg += ["--batch"]
try:
# security note: dbg is built from content coming from
# the server, which is trusted in this context.
return subprocess.run( # nosec
dbg, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
).stdout.decode(errors="ignore")
except subprocess.CalledProcessError as err:
self.logger.error(
"debug failed: %s", err.output.decode(errors="ignore")
)
raise err
else:
# security note: dbg is built from content coming from the
# server, which is trusted in this context.
subprocess.call(dbg) # nosec
return None
def _dbg_windows(
self,
repro: models.Repro,
debug_command: Optional[str],
retry_limit: Optional[int],
) -> Optional[str]:
"""Setup an SSH tunnel, then connect via CDB over SSH tunnel"""
if (
repro.auth is None
or repro.ip is None
or repro.state != enums.VmState.running
):
raise Exception("vm setup failed: %s" % repro.state)
retry_count = 0
bind_all = which("wslpath") is not None and repro.os == enums.OS.windows
proxy = "*:" + REPRO_SSH_FORWARD if bind_all else REPRO_SSH_FORWARD
while retry_limit is None or retry_count <= retry_limit:
if retry_limit:
retry_count = retry_count + 1
with ssh_connect(repro.ip, repro.auth.private_key, proxy=proxy):
dbg = ["cdb.exe", "-remote", "tcp:port=1337,server=localhost"]
if debug_command:
dbg_script = [debug_command, "qq"]
with temp_file(
"db.script", "\r\n".join(dbg_script)
) as dbg_script_path:
dbg += ["-cf", _wsl_path(dbg_script_path)]
logging.debug("launching: %s", dbg)
try:
# security note: dbg is built from content coming from the server,
# which is trusted in this context.
return subprocess.run( # nosec
dbg, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
).stdout.decode(errors="ignore")
except subprocess.CalledProcessError as err:
if err.returncode == 0x8007274D:
self.logger.info(
"failed to connect to debug-server trying again in 10 seconds..."
)
time.sleep(10.0)
else:
self.logger.error(
"debug failed: %s",
err.output.decode(errors="ignore"),
)
raise err
else:
logging.debug("launching: %s", dbg)
# security note: dbg is built from content coming from the
# server, which is trusted in this context.
try:
subprocess.check_call(dbg) # nosec
return None
except subprocess.CalledProcessError as err:
if err.returncode == 0x8007274D:
self.logger.info(
"failed to connect to debug-server trying again in 10 seconds..."
)
time.sleep(10.0)
else:
return None
if retry_limit is not None:
self.logger.info(
f"failed to connect to debug-server after {retry_limit} attempts. Please try again later "
+ f"with onefuzz debug connect {repro.vm_id}"
)
return None
def connect(
self,
vm_id: UUID_EXPANSION,
delete_after_use: bool = False,
debug_command: Optional[str] = None,
retry_limit: Optional[int] = None,
) -> Optional[str]:
"""Connect to an existing Reproduction VM"""
self.logger.info("connecting to reproduction VM: %s", vm_id)
if which("ssh") is None:
raise Exception("unable to find ssh on local machine")
def missing_os() -> Tuple[bool, str, models.Repro]:
repro = self.get(vm_id)
return (
repro.os is not None,
"waiting for os determination",
repro,
)
repro = wait(missing_os)
if repro.os == enums.OS.windows:
if which("cdb.exe") is None:
raise Exception("unable to find cdb.exe on local machine")
if repro.os == enums.OS.linux:
if which("gdb") is None:
raise Exception("unable to find gdb on local machine")
def func() -> Tuple[bool, str, models.Repro]:
repro = self.get(vm_id)
state = repro.state
return (
repro.auth is not None
and repro.ip is not None
and state not in [enums.VmState.init, enums.VmState.extensions_launch],
"launching reproducing vm. current state: %s" % state,
repro,
)
repro = wait(func)
# give time for debug server to initialize
time.sleep(30.0)
result: Optional[str] = None
if repro.os == enums.OS.windows:
result = self._dbg_windows(repro, debug_command, retry_limit)
elif repro.os == enums.OS.linux:
result = self._dbg_linux(repro, debug_command)
else:
raise NotImplementedError
if delete_after_use:
self.logger.debug("deleting vm %s", repro.vm_id)
self.delete(repro.vm_id)
return result
def create_and_connect(
self,
container: primitives.Container,
path: str,
duration: int = 24,
delete_after_use: bool = False,
debug_command: Optional[str] = None,
retry_limit: Optional[int] = None,
) -> Optional[str]:
"""Create and connect to a Reproduction VM"""
repro = self.create(container, path, duration=duration)
return self.connect(
repro.vm_id,
delete_after_use=delete_after_use,
debug_command=debug_command,
retry_limit=retry_limit,
)
class Notifications(Endpoint):
"""Interact with models.Notifications"""
@ -1900,7 +1588,6 @@ class Onefuzz:
client_secret=client_secret,
)
self.containers = Containers(self)
self.repro = Repro(self)
self.notifications = Notifications(self)
self.tasks = Tasks(self)
self.jobs = Jobs(self)

View File

@ -28,9 +28,6 @@ from onefuzztypes.responses import TemplateValidationResponse
from onefuzz.api import UUID_EXPANSION, Command, Endpoint, Onefuzz
from .azure_identity_credential_adapter import AzureIdentityCredentialAdapter
from .backend import wait
from .rdp import rdp_connect
from .ssh import ssh_connect
EMPTY_SHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
ZERO_SHA256 = "0" * len(EMPTY_SHA256)
@ -39,143 +36,6 @@ HOUR_TIMESPAN = "PT1H"
DEFAULT_TAIL_DELAY = 10.0
class DebugRepro(Command):
"""Debug repro instances"""
def _disambiguate(self, vm_id: UUID_EXPANSION) -> str:
return str(
self.onefuzz.repro._disambiguate_uuid(
"vm_id",
vm_id,
lambda: [str(x.vm_id) for x in self.onefuzz.repro.list()],
)
)
def _info(self) -> Tuple[str, str]:
info = self.onefuzz.info.get()
return info.resource_group, info.subscription
def ssh(self, vm_id: str) -> None:
vm_id = self._disambiguate(vm_id)
repro = self.onefuzz.repro.get(vm_id)
if repro.ip is None:
raise Exception("missing IP: %s" % repro)
if repro.auth is None:
raise Exception("missing Auth: %s" % repro)
with ssh_connect(repro.ip, repro.auth.private_key, call=True):
pass
def rdp(self, vm_id: str) -> None:
vm_id = self._disambiguate(vm_id)
repro = self.onefuzz.repro.get(vm_id)
if repro.ip is None:
raise Exception("missing IP: %s" % repro)
if repro.auth is None:
raise Exception("missing Auth: %s" % repro)
RDP_PORT = 3389
with rdp_connect(repro.ip, repro.auth.password, port=RDP_PORT):
return
class DebugNode(Command):
"""Debug a specific node on a scaleset"""
def rdp(self, machine_id: UUID_EXPANSION, duration: Optional[int] = 1) -> None:
node = self.onefuzz.nodes.get(machine_id)
if node.scaleset_id is None:
raise Exception("node is not part of a scaleset")
self.onefuzz.debug.scalesets.rdp(
scaleset_id=node.scaleset_id, machine_id=node.machine_id, duration=duration
)
def ssh(self, machine_id: UUID_EXPANSION, duration: Optional[int] = 1) -> None:
node = self.onefuzz.nodes.get(machine_id)
if node.scaleset_id is None:
raise Exception("node is not part of a scaleset")
self.onefuzz.debug.scalesets.ssh(
scaleset_id=node.scaleset_id, machine_id=node.machine_id, duration=duration
)
class DebugScaleset(Command):
"""Debug tasks"""
def _get_proxy_setup(
self, scaleset_id: str, machine_id: UUID, port: int, duration: Optional[int]
) -> Tuple[bool, str, Optional[Tuple[str, int]]]:
proxy = self.onefuzz.scaleset_proxy.create(
scaleset_id, machine_id, port, duration=duration
)
if proxy.ip is None:
return (False, "waiting on proxy ip", None)
return (True, "waiting on proxy port", (proxy.ip, proxy.forward.src_port))
def rdp(
self,
scaleset_id: str,
machine_id: UUID_EXPANSION,
duration: Optional[int] = 1,
) -> None:
(
scaleset,
machine_id_expanded,
) = self.onefuzz.scalesets._expand_scaleset_machine(
scaleset_id, machine_id, include_auth=True
)
RDP_PORT = 3389
setup = wait(
lambda: self._get_proxy_setup(
scaleset.scaleset_id, machine_id_expanded, RDP_PORT, duration
)
)
if setup is None:
raise Exception("no proxy for RDP port configured")
if scaleset.auth is None:
raise Exception("auth is not available for scaleset")
ip, port = setup
with rdp_connect(ip, scaleset.auth.password, port=port):
return
def ssh(
self,
scaleset_id: str,
machine_id: UUID_EXPANSION,
duration: Optional[int] = 1,
command: Optional[str] = None,
) -> None:
(
scaleset,
machine_id_expanded,
) = self.onefuzz.scalesets._expand_scaleset_machine(
scaleset_id, machine_id, include_auth=True
)
SSH_PORT = 22
setup = wait(
lambda: self._get_proxy_setup(
scaleset.scaleset_id, machine_id_expanded, SSH_PORT, duration
)
)
if setup is None:
raise Exception("no proxy for SSH port configured")
ip, port = setup
if scaleset.auth is None:
raise Exception("auth is not available for scaleset")
with ssh_connect(
ip, scaleset.auth.private_key, port=port, call=True, command=command
):
return
class DebugTask(Command):
"""Debug a specific task"""
@ -202,26 +62,6 @@ class DebugTask(Command):
raise Exception("unable to find scaleset node running on task")
def ssh(
self,
task_id: UUID_EXPANSION,
*,
node_id: Optional[UUID] = None,
duration: Optional[int] = 1,
) -> None:
scaleset_id, node_id = self._get_node(task_id, node_id)
return self.onefuzz.debug.scalesets.ssh(scaleset_id, node_id, duration=duration)
def rdp(
self,
task_id: UUID_EXPANSION,
*,
node_id: Optional[UUID] = None,
duration: Optional[int] = 1,
) -> None:
scaleset_id, node_id = self._get_node(task_id, node_id)
return self.onefuzz.debug.scalesets.rdp(scaleset_id, node_id, duration=duration)
def libfuzzer_coverage(
self,
task_id: UUID_EXPANSION,
@ -276,37 +116,12 @@ class DebugJobTask(Command):
"unable to find task type %s for job:%s" % (task_type.name, job_id)
)
def ssh(
self,
job_id: UUID_EXPANSION,
task_type: TaskType,
*,
duration: Optional[int] = 1,
) -> None:
"""SSH into the first node running the specified task type in the job"""
return self.onefuzz.debug.task.ssh(
self._get_task(job_id, task_type), duration=duration
)
def rdp(
self,
job_id: UUID_EXPANSION,
task_type: TaskType,
*,
duration: Optional[int] = 1,
) -> None:
"""RDP into the first node running the specified task type in the job"""
return self.onefuzz.debug.task.rdp(
self._get_task(job_id, task_type), duration=duration
)
class DebugJob(Command):
"""Debug a specific Job"""
def __init__(self, onefuzz: Any, logger: logging.Logger):
super().__init__(onefuzz, logger)
self.task = DebugJobTask(onefuzz, logger)
def libfuzzer_coverage(
self,
@ -883,10 +698,7 @@ class Debug(Command):
def __init__(self, onefuzz: Any, logger: logging.Logger):
super().__init__(onefuzz, logger)
self.scalesets = DebugScaleset(onefuzz, logger)
self.repro = DebugRepro(onefuzz, logger)
self.job = DebugJob(onefuzz, logger)
self.notification = DebugNotification(onefuzz, logger)
self.task = DebugTask(onefuzz, logger)
self.logs = DebugLog(onefuzz, logger)
self.node = DebugNode(onefuzz, logger)

View File

@ -244,7 +244,7 @@ TARGETS: Dict[str, Integration] = {
"--test:{extra_setup_dir}",
"--write_test_file={extra_output_dir}/test.txt",
],
pool=PoolName("mariner")
pool=PoolName("mariner"),
),
"windows-libfuzzer": Integration(
template=TemplateType.libfuzzer,
@ -401,10 +401,13 @@ class TestOnefuzz:
self.of.pools.create(name, OS.linux)
self.logger.info("creating scaleset for pool: %s", name)
self.of.scalesets.create(
name, pool_size, region=region, initial_size=pool_size, image="MicrosoftCBLMariner:cbl-mariner:cbl-mariner-2-gen2:latest"
name,
pool_size,
region=region,
initial_size=pool_size,
image="MicrosoftCBLMariner:cbl-mariner:cbl-mariner-2-gen2:latest",
)
class UnmanagedPool:
def __init__(
self,
@ -644,7 +647,7 @@ class TestOnefuzz:
setup = Directory(os.path.join(setup, config.nested_setup_dir))
job: Optional[Job] = None
job = self.build_job(
duration, pool, target, config, setup, target_exe, inputs
)
@ -1277,7 +1280,7 @@ class TestOnefuzz:
if seen_errors:
raise Exception("logs included errors")
def build_pool_name(self, os_type: str) -> PoolName:
return PoolName(f"testpool-{os_type}-{self.test_id}")
@ -1462,18 +1465,6 @@ class Run(Command):
job_ids=job_ids,
)
if skip_repro:
self.logger.warning("not testing crash repro")
else:
self.check_repros(
test_id,
endpoint=endpoint,
authority=authority,
client_id=client_id,
client_secret=client_secret,
job_ids=job_ids,
)
def test_unmanaged(
self,
samples: Directory,