mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-16 03:48:09 +00:00
add missing scaleset nodes (#518)
This commit is contained in:
@ -49,6 +49,7 @@ class Node(BASE_NODE, ORMMixin):
|
||||
machine_id: UUID,
|
||||
scaleset_id: Optional[UUID],
|
||||
version: str,
|
||||
new: bool = False,
|
||||
) -> "Node":
|
||||
node = cls(
|
||||
pool_name=pool_name,
|
||||
@ -56,14 +57,18 @@ class Node(BASE_NODE, ORMMixin):
|
||||
scaleset_id=scaleset_id,
|
||||
version=version,
|
||||
)
|
||||
node.save()
|
||||
send_event(
|
||||
EventNodeCreated(
|
||||
machine_id=node.machine_id,
|
||||
scaleset_id=node.scaleset_id,
|
||||
pool_name=node.pool_name,
|
||||
# `save` returns None if it's successfully saved. If `new` is set to
|
||||
# True, `save` returns an Error if an object already exists. As such,
|
||||
# only send an event if result is None
|
||||
result = node.save(new=new)
|
||||
if result is None:
|
||||
send_event(
|
||||
EventNodeCreated(
|
||||
machine_id=node.machine_id,
|
||||
scaleset_id=node.scaleset_id,
|
||||
pool_name=node.pool_name,
|
||||
)
|
||||
)
|
||||
)
|
||||
return node
|
||||
|
||||
@classmethod
|
||||
|
@ -20,6 +20,7 @@ from onefuzztypes.models import ScalesetNodeState
|
||||
from onefuzztypes.primitives import PoolName, Region
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from ..__version__ import __version__
|
||||
from ..azure.auth import build_auth
|
||||
from ..azure.image import get_os
|
||||
from ..azure.network import Network
|
||||
@ -300,6 +301,35 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
||||
)
|
||||
node.delete()
|
||||
|
||||
# Scalesets can have nodes that never check in (such as broken OS setup
|
||||
# scripts).
|
||||
#
|
||||
# This will add nodes that Azure knows about but have not checked in
|
||||
# such that the `dead node` detection will eventually reimage the node.
|
||||
#
|
||||
# NOTE: If node setup takes longer than NODE_EXPIRATION_TIME (1 hour),
|
||||
# this will cause the nodes to continuously get reimaged.
|
||||
node_machine_ids = [x.machine_id for x in nodes]
|
||||
for machine_id in azure_nodes:
|
||||
if machine_id in node_machine_ids:
|
||||
continue
|
||||
|
||||
logging.info(
|
||||
"scaleset - adding missing azure node: %s:%s",
|
||||
self.scaleset_id,
|
||||
machine_id,
|
||||
)
|
||||
|
||||
# Note, using `new=True` makes it such that if a node already has
|
||||
# checked in, this won't overwrite it.
|
||||
Node.create(
|
||||
pool_name=self.pool_name,
|
||||
machine_id=machine_id,
|
||||
scaleset_id=self.scaleset_id,
|
||||
version=__version__,
|
||||
new=True,
|
||||
)
|
||||
|
||||
existing_nodes = [x for x in nodes if x.machine_id in azure_nodes]
|
||||
nodes_to_reset = [
|
||||
x for x in existing_nodes if x.state in NodeState.ready_for_reset()
|
||||
|
Reference in New Issue
Block a user