mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-11 09:41:37 +00:00
Reimage dead nodes (#154)
This commit is contained in:
parent
e5f723e9c9
commit
041c6ae130
@ -62,6 +62,8 @@ from .azure.vmss import (
|
||||
from .extension import fuzz_extensions
|
||||
from .orm import MappingIntStrAny, ORMMixin, QueryFilter
|
||||
|
||||
NODE_EXPIRATION_TIME: datetime.timedelta = datetime.timedelta(hours=1)
|
||||
|
||||
# Future work:
|
||||
#
|
||||
# Enabling autoscaling for the scalesets based on the pool work queues.
|
||||
@ -278,6 +280,18 @@ class Node(BASE_NODE, ORMMixin):
|
||||
self.set_shutdown()
|
||||
self.stop()
|
||||
|
||||
@classmethod
|
||||
def get_dead_nodes(
|
||||
cls, scaleset_id: UUID, expiration_period: datetime.timedelta
|
||||
) -> List["Node"]:
|
||||
time_filter = "heartbeat lt datetime'%s'" % (
|
||||
(datetime.datetime.utcnow() - expiration_period).isoformat()
|
||||
)
|
||||
return cls.search(
|
||||
query={"scaleset_id": [scaleset_id]},
|
||||
raw_unchecked_filter=time_filter,
|
||||
)
|
||||
|
||||
|
||||
class NodeTasks(BASE_NODE_TASK, ORMMixin):
|
||||
@classmethod
|
||||
@ -743,6 +757,11 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
||||
# only add nodes that are not already set to reschedule
|
||||
to_reimage.append(node)
|
||||
|
||||
dead_nodes = Node.get_dead_nodes(self.scaleset_id, NODE_EXPIRATION_TIME)
|
||||
for node in dead_nodes:
|
||||
node.set_halt()
|
||||
to_reimage.append(node)
|
||||
|
||||
# Perform operations until they fail due to scaleset getting locked
|
||||
try:
|
||||
if to_delete:
|
||||
|
Loading…
x
Reference in New Issue
Block a user