mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-16 20:08:09 +00:00
add missing scaleset nodes (#518)
This commit is contained in:
@ -49,6 +49,7 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
machine_id: UUID,
|
machine_id: UUID,
|
||||||
scaleset_id: Optional[UUID],
|
scaleset_id: Optional[UUID],
|
||||||
version: str,
|
version: str,
|
||||||
|
new: bool = False,
|
||||||
) -> "Node":
|
) -> "Node":
|
||||||
node = cls(
|
node = cls(
|
||||||
pool_name=pool_name,
|
pool_name=pool_name,
|
||||||
@ -56,7 +57,11 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
scaleset_id=scaleset_id,
|
scaleset_id=scaleset_id,
|
||||||
version=version,
|
version=version,
|
||||||
)
|
)
|
||||||
node.save()
|
# `save` returns None if it's successfully saved. If `new` is set to
|
||||||
|
# True, `save` returns an Error if an object already exists. As such,
|
||||||
|
# only send an event if result is None
|
||||||
|
result = node.save(new=new)
|
||||||
|
if result is None:
|
||||||
send_event(
|
send_event(
|
||||||
EventNodeCreated(
|
EventNodeCreated(
|
||||||
machine_id=node.machine_id,
|
machine_id=node.machine_id,
|
||||||
|
@ -20,6 +20,7 @@ from onefuzztypes.models import ScalesetNodeState
|
|||||||
from onefuzztypes.primitives import PoolName, Region
|
from onefuzztypes.primitives import PoolName, Region
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from ..__version__ import __version__
|
||||||
from ..azure.auth import build_auth
|
from ..azure.auth import build_auth
|
||||||
from ..azure.image import get_os
|
from ..azure.image import get_os
|
||||||
from ..azure.network import Network
|
from ..azure.network import Network
|
||||||
@ -300,6 +301,35 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
)
|
)
|
||||||
node.delete()
|
node.delete()
|
||||||
|
|
||||||
|
# Scalesets can have nodes that never check in (such as broken OS setup
|
||||||
|
# scripts).
|
||||||
|
#
|
||||||
|
# This will add nodes that Azure knows about but have not checked in
|
||||||
|
# such that the `dead node` detection will eventually reimage the node.
|
||||||
|
#
|
||||||
|
# NOTE: If node setup takes longer than NODE_EXPIRATION_TIME (1 hour),
|
||||||
|
# this will cause the nodes to continuously get reimaged.
|
||||||
|
node_machine_ids = [x.machine_id for x in nodes]
|
||||||
|
for machine_id in azure_nodes:
|
||||||
|
if machine_id in node_machine_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logging.info(
|
||||||
|
"scaleset - adding missing azure node: %s:%s",
|
||||||
|
self.scaleset_id,
|
||||||
|
machine_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Note, using `new=True` makes it such that if a node already has
|
||||||
|
# checked in, this won't overwrite it.
|
||||||
|
Node.create(
|
||||||
|
pool_name=self.pool_name,
|
||||||
|
machine_id=machine_id,
|
||||||
|
scaleset_id=self.scaleset_id,
|
||||||
|
version=__version__,
|
||||||
|
new=True,
|
||||||
|
)
|
||||||
|
|
||||||
existing_nodes = [x for x in nodes if x.machine_id in azure_nodes]
|
existing_nodes = [x for x in nodes if x.machine_id in azure_nodes]
|
||||||
nodes_to_reset = [
|
nodes_to_reset = [
|
||||||
x for x in existing_nodes if x.state in NodeState.ready_for_reset()
|
x for x in existing_nodes if x.state in NodeState.ready_for_reset()
|
||||||
|
Reference in New Issue
Block a user