Retry on "No connection could be made because target machine actively… (#2252)

* Retry on "No connection could be made because target machine actively refused it" when connecting debugger using create_and_connect command

* limit number of retries

Co-authored-by: stas <statis@microsoft.com>
This commit is contained in:
Stas
2022-08-15 09:24:26 -07:00
committed by GitHub
parent ef0367cb7a
commit 15d92cad52
2 changed files with 57 additions and 23 deletions

View File

@ -9,6 +9,7 @@ import os
import pkgutil import pkgutil
import re import re
import subprocess # nosec import subprocess # nosec
import time
import uuid import uuid
from enum import Enum from enum import Enum
from shutil import which from shutil import which
@ -553,7 +554,9 @@ class Repro(Endpoint):
return None return None
def _dbg_windows( def _dbg_windows(
self, repro: models.Repro, debug_command: Optional[str] self,
repro: models.Repro,
debug_command: Optional[str],
) -> Optional[str]: ) -> Optional[str]:
"""Setup an SSH tunnel, then connect via CDB over SSH tunnel""" """Setup an SSH tunnel, then connect via CDB over SSH tunnel"""
@ -564,33 +567,54 @@ class Repro(Endpoint):
): ):
raise Exception("vm setup failed: %s" % repro.state) raise Exception("vm setup failed: %s" % repro.state)
NUM_RETRIES = 10
bind_all = which("wslpath") is not None and repro.os == enums.OS.windows bind_all = which("wslpath") is not None and repro.os == enums.OS.windows
proxy = "*:" + REPRO_SSH_FORWARD if bind_all else REPRO_SSH_FORWARD proxy = "*:" + REPRO_SSH_FORWARD if bind_all else REPRO_SSH_FORWARD
with ssh_connect(repro.ip, repro.auth.private_key, proxy=proxy): with ssh_connect(repro.ip, repro.auth.private_key, proxy=proxy):
dbg = ["cdb.exe", "-remote", "tcp:port=1337,server=localhost"] dbg = ["cdb.exe", "-remote", "tcp:port=1337,server=localhost"]
if debug_command: while NUM_RETRIES > 0:
dbg_script = [debug_command, "qq"] NUM_RETRIES = NUM_RETRIES - 1
with temp_file("db.script", "\r\n".join(dbg_script)) as dbg_script_path: if debug_command:
dbg += ["-cf", _wsl_path(dbg_script_path)] dbg_script = [debug_command, "qq"]
with temp_file(
"db.script", "\r\n".join(dbg_script)
) as dbg_script_path:
dbg += ["-cf", _wsl_path(dbg_script_path)]
logging.debug("launching: %s", dbg)
try:
# security note: dbg is built from content coming from the server,
# which is trusted in this context.
return subprocess.run( # nosec
dbg, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
).stdout.decode(errors="ignore")
except subprocess.CalledProcessError as err:
if err.returncode == 0x8007274D:
self.logger.info(
"failed to connect to debug-server trying again in 10 seconds..."
)
time.sleep(10.0)
else:
self.logger.error(
"debug failed: %s",
err.output.decode(errors="ignore"),
)
raise err
else:
logging.debug("launching: %s", dbg) logging.debug("launching: %s", dbg)
# security note: dbg is built from content coming from the
# server, which is trusted in this context.
try: try:
# security note: dbg is built from content coming from the server, subprocess.check_call(dbg) # nosec
# which is trusted in this context. break
return subprocess.run( # nosec
dbg, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
).stdout.decode(errors="ignore")
except subprocess.CalledProcessError as err: except subprocess.CalledProcessError as err:
self.logger.error( if err.returncode == 0x8007274D:
"debug failed: %s", err.output.decode(errors="ignore") self.logger.info(
) "failed to connect to debug-server trying again in 10 seconds..."
raise err )
else: time.sleep(10.0)
logging.debug("launching: %s", dbg) else:
# security note: dbg is built from content coming from the break
# server, which is trusted in this context.
subprocess.call(dbg) # nosec
return None return None
def connect( def connect(
@ -635,9 +659,9 @@ class Repro(Endpoint):
) )
repro = wait(func) repro = wait(func)
# give time for debug server to initialize
time.sleep(30.0)
result: Optional[str] = None result: Optional[str] = None
if repro.os == enums.OS.windows: if repro.os == enums.OS.windows:
result = self._dbg_windows(repro, debug_command) result = self._dbg_windows(repro, debug_command)
elif repro.os == enums.OS.linux: elif repro.os == enums.OS.linux:

View File

@ -60,6 +60,7 @@ def build_ssh_command(
proxy: Optional[str] = None, proxy: Optional[str] = None,
port: Optional[int] = None, port: Optional[int] = None,
command: Optional[str] = None, command: Optional[str] = None,
num_auth_retries: Optional[int] = None,
) -> Generator: ) -> Generator:
with temp_file("id_rsa", private_key, set_owner_only=True) as ssh_key: with temp_file("id_rsa", private_key, set_owner_only=True) as ssh_key:
cmd = [ cmd = [
@ -82,6 +83,9 @@ def build_ssh_command(
if log_level <= logging.DEBUG: if log_level <= logging.DEBUG:
cmd += ["-v"] cmd += ["-v"]
if num_auth_retries:
cmd += ["authentication-retries", str(num_auth_retries)]
if command: if command:
cmd += [command] cmd += [command]
@ -97,9 +101,15 @@ def ssh_connect(
call: bool = False, call: bool = False,
port: Optional[int] = None, port: Optional[int] = None,
command: Optional[str] = None, command: Optional[str] = None,
num_auth_retries: Optional[int] = None,
) -> Generator: ) -> Generator:
with build_ssh_command( with build_ssh_command(
ip, private_key, proxy=proxy, port=port, command=command ip,
private_key,
proxy=proxy,
port=port,
command=command,
num_auth_retries=num_auth_retries,
) as cmd: ) as cmd:
logging.info("launching ssh: %s", " ".join(cmd)) logging.info("launching ssh: %s", " ".join(cmd))