mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-17 12:28:07 +00:00
Report the setup failure in the task when available (#781)
This commit is contained in:
@ -125,24 +125,30 @@ def on_state_update(
|
|||||||
node_task.save()
|
node_task.save()
|
||||||
|
|
||||||
elif state == NodeState.done:
|
elif state == NodeState.done:
|
||||||
# if tasks are running on the node when it reports as Done
|
|
||||||
# those are stopped early
|
|
||||||
node.mark_tasks_stopped_early()
|
|
||||||
node.to_reimage(done=True)
|
|
||||||
|
|
||||||
# Model-validated.
|
# Model-validated.
|
||||||
#
|
#
|
||||||
# This field will be required in the future.
|
# This field will be required in the future.
|
||||||
# For now, it is optional for back compat.
|
# For now, it is optional for back compat.
|
||||||
done_data = cast(Optional[NodeDoneEventData], state_update.data)
|
done_data = cast(Optional[NodeDoneEventData], state_update.data)
|
||||||
|
error = None
|
||||||
if done_data:
|
if done_data:
|
||||||
# TODO: do something with this done data
|
|
||||||
if done_data.error:
|
if done_data.error:
|
||||||
|
error_text = done_data.json(exclude_none=True)
|
||||||
|
error = Error(
|
||||||
|
code=ErrorCode.TASK_FAILED,
|
||||||
|
errors=[error_text],
|
||||||
|
)
|
||||||
logging.error(
|
logging.error(
|
||||||
"node 'done' with error: machine_id:%s, data:%s",
|
"node 'done' with error: machine_id:%s, data:%s",
|
||||||
machine_id,
|
machine_id,
|
||||||
done_data,
|
error_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# if tasks are running on the node when it reports as Done
|
||||||
|
# those are stopped early
|
||||||
|
node.mark_tasks_stopped_early(error=error)
|
||||||
|
node.to_reimage(done=True)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -220,18 +220,19 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
self.stop()
|
self.stop()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def mark_tasks_stopped_early(self) -> None:
|
def mark_tasks_stopped_early(self, error: Optional[Error] = None) -> None:
|
||||||
from ..tasks.main import Task
|
from ..tasks.main import Task
|
||||||
|
|
||||||
|
if error is None:
|
||||||
|
error = Error(
|
||||||
|
code=ErrorCode.TASK_FAILED,
|
||||||
|
errors=["node reimaged during task execution"],
|
||||||
|
)
|
||||||
|
|
||||||
for entry in NodeTasks.get_by_machine_id(self.machine_id):
|
for entry in NodeTasks.get_by_machine_id(self.machine_id):
|
||||||
task = Task.get_by_task_id(entry.task_id)
|
task = Task.get_by_task_id(entry.task_id)
|
||||||
if isinstance(task, Task):
|
if isinstance(task, Task):
|
||||||
task.mark_failed(
|
task.mark_failed(error)
|
||||||
Error(
|
|
||||||
code=ErrorCode.TASK_FAILED,
|
|
||||||
errors=["node reimaged during task execution"],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def could_shrink_scaleset(self) -> bool:
|
def could_shrink_scaleset(self) -> bool:
|
||||||
from .scalesets import ScalesetShrinkQueue
|
from .scalesets import ScalesetShrinkQueue
|
||||||
|
Reference in New Issue
Block a user