Fix multi proxy race condition (#909)

Refactored PR of #904 for easier review.  Once #908 is reviewed & merged, this will be easier to review.
This commit is contained in:
bmc-msft
2021-05-22 02:50:08 -04:00
committed by GitHub
parent 6e5f7e4d4c
commit a103985c0d
3 changed files with 33 additions and 42 deletions

View File

@ -169,6 +169,9 @@ class Proxy(ORMMixin):
self.delete() self.delete()
def is_outdated(self) -> bool: def is_outdated(self) -> bool:
if self.state not in VmState.available():
return True
if self.version != __version__: if self.version != __version__:
logging.info( logging.info(
PROXY_LOG_PREFIX + "mismatch version: proxy:%s service:%s state:%s", PROXY_LOG_PREFIX + "mismatch version: proxy:%s service:%s state:%s",

View File

@ -6,43 +6,13 @@
import logging import logging
import azure.functions as func import azure.functions as func
from onefuzztypes.enums import VmState
from onefuzztypes.events import EventProxyCreated
from ..onefuzzlib.events import get_events, send_event from ..onefuzzlib.events import get_events
from ..onefuzzlib.proxy import Proxy
from ..onefuzzlib.webhooks import WebhookMessageLog from ..onefuzzlib.webhooks import WebhookMessageLog
from ..onefuzzlib.workers.scalesets import Scaleset from ..onefuzzlib.workers.scalesets import Scaleset
def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: F841 def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: F841
proxy_list = Proxy.search()
# Marking Outdated Proxies. Subsequently, shutting down Outdated & Unused Proxies.
for proxy in proxy_list:
if proxy.is_outdated():
logging.info("marking proxy in %s as outdated.", proxy.region)
proxy.outdated = True
proxy.save()
# Creating a new proxy if no proxy exists for a given region.
for proxy in proxy_list:
if proxy.outdated:
region_list = list(
filter(
lambda x: (x.region == proxy.region and not x.outdated),
proxy_list,
)
)
if not len(region_list):
logging.info("outdated proxy in %s, creating new one.", proxy.region)
new_proxy = Proxy(region=proxy.region)
new_proxy.save()
send_event(
EventProxyCreated(region=proxy.region, proxy_id=proxy.proxy_id)
)
if not proxy.is_used():
logging.info("stopping one proxy in %s.", proxy.region)
proxy.set_state(VmState.stopping)
scalesets = Scaleset.search() scalesets = Scaleset.search()
for scaleset in scalesets: for scaleset in scalesets:
logging.info("updating scaleset configs: %s", scaleset.scaleset_id) logging.info("updating scaleset configs: %s", scaleset.scaleset_id)

View File

@ -11,20 +11,27 @@ from onefuzztypes.enums import VmState
from ..onefuzzlib.events import get_events from ..onefuzzlib.events import get_events
from ..onefuzzlib.orm import process_state_updates from ..onefuzzlib.orm import process_state_updates
from ..onefuzzlib.proxy import PROXY_LOG_PREFIX, Proxy from ..onefuzzlib.proxy import PROXY_LOG_PREFIX, Proxy
from ..onefuzzlib.workers.scalesets import Scaleset
def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: F841 def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: F841
# Reminder, proxies are created on-demand. If something is "wrong" with proxies = Proxy.search()
# a proxy, the plan is: delete and recreate it. for proxy in proxies:
for proxy in Proxy.search(): if proxy.state in VmState.available():
if not proxy.is_alive(): # Note, outdated checked at the start, but set at the end of this loop.
logging.error( # As this function is called via a timer, this works around a user
PROXY_LOG_PREFIX + "alive check failed, stopping: %s", proxy.region # requesting to use the proxy while this function is checking if it's
) # out of date
proxy.set_state(VmState.stopping) if proxy.outdated and not proxy.is_used():
proxy.save() proxy.set_state(VmState.stopping)
else: # If something is "wrong" with a proxy, delete & recreate it
proxy.save_proxy_config() elif not proxy.is_alive():
logging.error(
PROXY_LOG_PREFIX + "alive check failed, stopping: %s", proxy.region
)
proxy.set_state(VmState.stopping)
else:
proxy.save_proxy_config()
if proxy.state in VmState.needs_work(): if proxy.state in VmState.needs_work():
logging.info( logging.info(
@ -34,6 +41,17 @@ def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa:
) )
process_state_updates(proxy) process_state_updates(proxy)
if proxy.is_outdated():
proxy.outdated = True
proxy.save()
# make sure there is a proxy for every currently active region
scalesets = Scaleset.search()
regions = set(x.region for x in scalesets)
for region in regions:
if all(x.outdated for x in proxies if x.region == region):
Proxy.get_or_create(region)
events = get_events() events = get_events()
if events: if events:
dashboard.set(events) dashboard.set(events)