mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-17 04:18:07 +00:00
mark tasks as failed when the node is being reimaged due to heartbeat issues (#1015)
This commit is contained in:
@ -391,6 +391,16 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
",".join(str(x.machine_id) for x in dead_nodes),
|
",".join(str(x.machine_id) for x in dead_nodes),
|
||||||
)
|
)
|
||||||
for node in dead_nodes:
|
for node in dead_nodes:
|
||||||
|
error = Error(
|
||||||
|
code=ErrorCode.TASK_FAILED,
|
||||||
|
errors=[
|
||||||
|
"node reimaged due to expired heartbeat",
|
||||||
|
f"scaleset_id:{node.scaleset_id} machine_id:{node.machine_id}",
|
||||||
|
f"last heartbeat:{node.heartbeat}",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
node.mark_tasks_stopped_early(error=error)
|
||||||
|
node.to_reimage(done=True)
|
||||||
if node not in to_reimage:
|
if node not in to_reimage:
|
||||||
to_reimage.append(node)
|
to_reimage.append(node)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user