only record failures generated prior to stopping (#83)

This commit is contained in:
bmc-msft
2020-10-02 01:31:51 -04:00
committed by GitHub
parent a529f073a8
commit a196716e12
4 changed files with 29 additions and 16 deletions

View File

@ -145,13 +145,15 @@ def on_worker_event(machine_id: UUID, event: WorkerEvent) -> func.HttpResponse:
if not exit_status.success: if not exit_status.success:
logging.error("task failed: status = %s", exit_status) logging.error("task failed: status = %s", exit_status)
task.error = Error( task.mark_failed(
code=ErrorCode.TASK_FAILED, Error(
errors=[ code=ErrorCode.TASK_FAILED,
"task failed. exit_status = %s" % exit_status, errors=[
event.done.stdout, "task failed. exit_status = %s" % exit_status,
event.done.stderr, event.done.stdout,
], event.done.stderr,
],
)
) )
task.state = TaskState.stopping task.state = TaskState.stopping

View File

@ -24,7 +24,7 @@ from azure.devops.v6_0.work_item_tracking.work_item_tracking_client import (
WorkItemTrackingClient, WorkItemTrackingClient,
) )
from memoization import cached from memoization import cached
from onefuzztypes.enums import ErrorCode, TaskState from onefuzztypes.enums import ErrorCode
from onefuzztypes.models import ADOTemplate, Error, Report from onefuzztypes.models import ADOTemplate, Error, Report
from ..tasks.main import Task from ..tasks.main import Task
@ -211,9 +211,9 @@ def fail_task(report: Report, error: Exception) -> None:
task = Task.get(report.job_id, report.task_id) task = Task.get(report.job_id, report.task_id)
if task: if task:
task.error = Error(code=ErrorCode.NOTIFICATION_FAILURE, errors=[str(error)]) task.mark_failed(
task.state = TaskState.stopping Error(code=ErrorCode.NOTIFICATION_FAILURE, errors=[str(error)])
task.save() )
def notify_ado( def notify_ado(

View File

@ -740,12 +740,12 @@ class Scaleset(BASE_SCALESET, ORMMixin):
if task.state in [TaskState.stopping, TaskState.stopped]: if task.state in [TaskState.stopping, TaskState.stopped]:
continue continue
task.error = Error( task.mark_failed(
code=ErrorCode.TASK_FAILED, Error(
errors=["node reimaged during task execution"], code=ErrorCode.TASK_FAILED,
errors=["node reimaged during task execution"],
)
) )
task.state = TaskState.stopping
task.save()
entry.delete() entry.delete()
if self.state == ScalesetState.shutdown: if self.state == ScalesetState.shutdown:

View File

@ -154,6 +154,17 @@ class Task(BASE_TASK, ORMMixin):
task = tasks[0] task = tasks[0]
return task return task
def mark_failed(self, error: Error) -> None:
if self.state in [TaskState.stopped, TaskState.stopping]:
logging.debug(
"ignoring post-task stop failures for %s:%s", self.job_id, self.task_id
)
return
self.error = error
self.state = TaskState.stopping
self.save()
def get_pool(self) -> Optional[Pool]: def get_pool(self) -> Optional[Pool]:
if self.config.pool: if self.config.pool:
pool = Pool.get_by_name(self.config.pool.pool_name) pool = Pool.get_by_name(self.config.pool.pool_name)