mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-16 20:08:09 +00:00
fix deleting nodes with expired heartbeats (#930)
This commit is contained in:
@ -287,8 +287,8 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
if self.could_shrink_scaleset():
|
if self.could_shrink_scaleset():
|
||||||
self.set_halt()
|
|
||||||
logging.info("node scheduled to shrink. machine_id:%s", self.machine_id)
|
logging.info("node scheduled to shrink. machine_id:%s", self.machine_id)
|
||||||
|
self.set_halt()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.scaleset_id:
|
if self.scaleset_id:
|
||||||
@ -384,8 +384,9 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
|
|
||||||
def set_halt(self) -> None:
|
def set_halt(self) -> None:
|
||||||
"""Tell the node to stop everything."""
|
"""Tell the node to stop everything."""
|
||||||
self.set_shutdown()
|
logging.info("setting halt: %s", self.machine_id)
|
||||||
self.stop()
|
self.delete_requested = True
|
||||||
|
self.stop(done=True)
|
||||||
self.set_state(NodeState.halt)
|
self.set_state(NodeState.halt)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -374,9 +374,17 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
to_reimage.append(node)
|
to_reimage.append(node)
|
||||||
|
|
||||||
dead_nodes = Node.get_dead_nodes(self.scaleset_id, NODE_EXPIRATION_TIME)
|
dead_nodes = Node.get_dead_nodes(self.scaleset_id, NODE_EXPIRATION_TIME)
|
||||||
for node in dead_nodes:
|
if dead_nodes:
|
||||||
node.set_halt()
|
logging.info(
|
||||||
to_reimage.append(node)
|
SCALESET_LOG_PREFIX
|
||||||
|
+ "reimaging nodes with expired heartbeats. "
|
||||||
|
+ "scaleset_id:%s nodes:%s",
|
||||||
|
self.scaleset_id,
|
||||||
|
",".join(str(x.machine_id) for x in dead_nodes),
|
||||||
|
)
|
||||||
|
for node in dead_nodes:
|
||||||
|
if node not in to_reimage:
|
||||||
|
to_reimage.append(node)
|
||||||
|
|
||||||
# Perform operations until they fail due to scaleset getting locked
|
# Perform operations until they fail due to scaleset getting locked
|
||||||
try:
|
try:
|
||||||
|
Reference in New Issue
Block a user