mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-12 18:18:08 +00:00
Reimage dead nodes (#154)
This commit is contained in:
@ -62,6 +62,8 @@ from .azure.vmss import (
|
|||||||
from .extension import fuzz_extensions
|
from .extension import fuzz_extensions
|
||||||
from .orm import MappingIntStrAny, ORMMixin, QueryFilter
|
from .orm import MappingIntStrAny, ORMMixin, QueryFilter
|
||||||
|
|
||||||
|
NODE_EXPIRATION_TIME: datetime.timedelta = datetime.timedelta(hours=1)
|
||||||
|
|
||||||
# Future work:
|
# Future work:
|
||||||
#
|
#
|
||||||
# Enabling autoscaling for the scalesets based on the pool work queues.
|
# Enabling autoscaling for the scalesets based on the pool work queues.
|
||||||
@ -278,6 +280,18 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
self.set_shutdown()
|
self.set_shutdown()
|
||||||
self.stop()
|
self.stop()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_dead_nodes(
|
||||||
|
cls, scaleset_id: UUID, expiration_period: datetime.timedelta
|
||||||
|
) -> List["Node"]:
|
||||||
|
time_filter = "heartbeat lt datetime'%s'" % (
|
||||||
|
(datetime.datetime.utcnow() - expiration_period).isoformat()
|
||||||
|
)
|
||||||
|
return cls.search(
|
||||||
|
query={"scaleset_id": [scaleset_id]},
|
||||||
|
raw_unchecked_filter=time_filter,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NodeTasks(BASE_NODE_TASK, ORMMixin):
|
class NodeTasks(BASE_NODE_TASK, ORMMixin):
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -743,6 +757,11 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
# only add nodes that are not already set to reschedule
|
# only add nodes that are not already set to reschedule
|
||||||
to_reimage.append(node)
|
to_reimage.append(node)
|
||||||
|
|
||||||
|
dead_nodes = Node.get_dead_nodes(self.scaleset_id, NODE_EXPIRATION_TIME)
|
||||||
|
for node in dead_nodes:
|
||||||
|
node.set_halt()
|
||||||
|
to_reimage.append(node)
|
||||||
|
|
||||||
# Perform operations until they fail due to scaleset getting locked
|
# Perform operations until they fail due to scaleset getting locked
|
||||||
try:
|
try:
|
||||||
if to_delete:
|
if to_delete:
|
||||||
|
Reference in New Issue
Block a user