mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-17 12:28:07 +00:00
add node & task heartbeat events (#621)
This adds node & task heartbeats and makes the event data available as a structured data in the logs.
This commit is contained in:
@ -29,6 +29,7 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
* [job_stopped](#job_stopped)
|
||||
* [node_created](#node_created)
|
||||
* [node_deleted](#node_deleted)
|
||||
* [node_heartbeat](#node_heartbeat)
|
||||
* [node_state_updated](#node_state_updated)
|
||||
* [ping](#ping)
|
||||
* [pool_created](#pool_created)
|
||||
@ -41,6 +42,7 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
* [scaleset_failed](#scaleset_failed)
|
||||
* [task_created](#task_created)
|
||||
* [task_failed](#task_failed)
|
||||
* [task_heartbeat](#task_heartbeat)
|
||||
* [task_state_updated](#task_state_updated)
|
||||
* [task_stopped](#task_stopped)
|
||||
|
||||
@ -506,6 +508,47 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
}
|
||||
```
|
||||
|
||||
### node_heartbeat
|
||||
|
||||
#### Example
|
||||
|
||||
```json
|
||||
{
|
||||
"machine_id": "00000000-0000-0000-0000-000000000000",
|
||||
"pool_name": "example"
|
||||
}
|
||||
```
|
||||
|
||||
#### Schema
|
||||
|
||||
```json
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"machine_id": {
|
||||
"format": "uuid",
|
||||
"title": "Machine Id",
|
||||
"type": "string"
|
||||
},
|
||||
"pool_name": {
|
||||
"title": "Pool Name",
|
||||
"type": "string"
|
||||
},
|
||||
"scaleset_id": {
|
||||
"format": "uuid",
|
||||
"title": "Scaleset Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"machine_id",
|
||||
"pool_name"
|
||||
],
|
||||
"title": "EventNodeHeartbeat",
|
||||
"type": "object"
|
||||
}
|
||||
```
|
||||
|
||||
### node_state_updated
|
||||
|
||||
#### Example
|
||||
@ -1982,6 +2025,404 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
}
|
||||
```
|
||||
|
||||
### task_heartbeat
|
||||
|
||||
#### Example
|
||||
|
||||
```json
|
||||
{
|
||||
"config": {
|
||||
"containers": [
|
||||
{
|
||||
"name": "my-setup",
|
||||
"type": "setup"
|
||||
},
|
||||
{
|
||||
"name": "my-inputs",
|
||||
"type": "inputs"
|
||||
},
|
||||
{
|
||||
"name": "my-crashes",
|
||||
"type": "crashes"
|
||||
}
|
||||
],
|
||||
"job_id": "00000000-0000-0000-0000-000000000000",
|
||||
"tags": {},
|
||||
"task": {
|
||||
"check_debugger": true,
|
||||
"duration": 1,
|
||||
"target_env": {},
|
||||
"target_exe": "fuzz.exe",
|
||||
"target_options": [],
|
||||
"type": "libfuzzer_fuzz"
|
||||
}
|
||||
},
|
||||
"job_id": "00000000-0000-0000-0000-000000000000",
|
||||
"task_id": "00000000-0000-0000-0000-000000000000"
|
||||
}
|
||||
```
|
||||
|
||||
#### Schema
|
||||
|
||||
```json
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"definitions": {
|
||||
"ContainerType": {
|
||||
"description": "An enumeration.",
|
||||
"enum": [
|
||||
"analysis",
|
||||
"coverage",
|
||||
"crashes",
|
||||
"inputs",
|
||||
"no_repro",
|
||||
"readonly_inputs",
|
||||
"reports",
|
||||
"setup",
|
||||
"tools",
|
||||
"unique_inputs",
|
||||
"unique_reports"
|
||||
],
|
||||
"title": "ContainerType"
|
||||
},
|
||||
"StatsFormat": {
|
||||
"description": "An enumeration.",
|
||||
"enum": [
|
||||
"AFL"
|
||||
],
|
||||
"title": "StatsFormat"
|
||||
},
|
||||
"TaskConfig": {
|
||||
"properties": {
|
||||
"colocate": {
|
||||
"title": "Colocate",
|
||||
"type": "boolean"
|
||||
},
|
||||
"containers": {
|
||||
"items": {
|
||||
"$ref": "#/definitions/TaskContainers"
|
||||
},
|
||||
"title": "Containers",
|
||||
"type": "array"
|
||||
},
|
||||
"debug": {
|
||||
"items": {
|
||||
"$ref": "#/definitions/TaskDebugFlag"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"job_id": {
|
||||
"format": "uuid",
|
||||
"title": "Job Id",
|
||||
"type": "string"
|
||||
},
|
||||
"pool": {
|
||||
"$ref": "#/definitions/TaskPool"
|
||||
},
|
||||
"prereq_tasks": {
|
||||
"items": {
|
||||
"format": "uuid",
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Prereq Tasks",
|
||||
"type": "array"
|
||||
},
|
||||
"tags": {
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Tags",
|
||||
"type": "object"
|
||||
},
|
||||
"task": {
|
||||
"$ref": "#/definitions/TaskDetails"
|
||||
},
|
||||
"vm": {
|
||||
"$ref": "#/definitions/TaskVm"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"job_id",
|
||||
"task",
|
||||
"containers",
|
||||
"tags"
|
||||
],
|
||||
"title": "TaskConfig",
|
||||
"type": "object"
|
||||
},
|
||||
"TaskContainers": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"title": "Name",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"$ref": "#/definitions/ContainerType"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"name"
|
||||
],
|
||||
"title": "TaskContainers",
|
||||
"type": "object"
|
||||
},
|
||||
"TaskDebugFlag": {
|
||||
"description": "An enumeration.",
|
||||
"enum": [
|
||||
"keep_node_on_failure",
|
||||
"keep_node_on_completion"
|
||||
],
|
||||
"title": "TaskDebugFlag"
|
||||
},
|
||||
"TaskDetails": {
|
||||
"properties": {
|
||||
"analyzer_env": {
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Analyzer Env",
|
||||
"type": "object"
|
||||
},
|
||||
"analyzer_exe": {
|
||||
"title": "Analyzer Exe",
|
||||
"type": "string"
|
||||
},
|
||||
"analyzer_options": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Analyzer Options",
|
||||
"type": "array"
|
||||
},
|
||||
"check_asan_log": {
|
||||
"title": "Check Asan Log",
|
||||
"type": "boolean"
|
||||
},
|
||||
"check_debugger": {
|
||||
"default": true,
|
||||
"title": "Check Debugger",
|
||||
"type": "boolean"
|
||||
},
|
||||
"check_fuzzer_help": {
|
||||
"title": "Check Fuzzer Help",
|
||||
"type": "boolean"
|
||||
},
|
||||
"check_retry_count": {
|
||||
"title": "Check Retry Count",
|
||||
"type": "integer"
|
||||
},
|
||||
"duration": {
|
||||
"title": "Duration",
|
||||
"type": "integer"
|
||||
},
|
||||
"ensemble_sync_delay": {
|
||||
"title": "Ensemble Sync Delay",
|
||||
"type": "integer"
|
||||
},
|
||||
"expect_crash_on_failure": {
|
||||
"title": "Expect Crash On Failure",
|
||||
"type": "boolean"
|
||||
},
|
||||
"generator_env": {
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Generator Env",
|
||||
"type": "object"
|
||||
},
|
||||
"generator_exe": {
|
||||
"title": "Generator Exe",
|
||||
"type": "string"
|
||||
},
|
||||
"generator_options": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Generator Options",
|
||||
"type": "array"
|
||||
},
|
||||
"preserve_existing_outputs": {
|
||||
"title": "Preserve Existing Outputs",
|
||||
"type": "boolean"
|
||||
},
|
||||
"reboot_after_setup": {
|
||||
"title": "Reboot After Setup",
|
||||
"type": "boolean"
|
||||
},
|
||||
"rename_output": {
|
||||
"title": "Rename Output",
|
||||
"type": "boolean"
|
||||
},
|
||||
"stats_file": {
|
||||
"title": "Stats File",
|
||||
"type": "string"
|
||||
},
|
||||
"stats_format": {
|
||||
"$ref": "#/definitions/StatsFormat"
|
||||
},
|
||||
"supervisor_env": {
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Supervisor Env",
|
||||
"type": "object"
|
||||
},
|
||||
"supervisor_exe": {
|
||||
"title": "Supervisor Exe",
|
||||
"type": "string"
|
||||
},
|
||||
"supervisor_input_marker": {
|
||||
"title": "Supervisor Input Marker",
|
||||
"type": "string"
|
||||
},
|
||||
"supervisor_options": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Supervisor Options",
|
||||
"type": "array"
|
||||
},
|
||||
"target_env": {
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Target Env",
|
||||
"type": "object"
|
||||
},
|
||||
"target_exe": {
|
||||
"title": "Target Exe",
|
||||
"type": "string"
|
||||
},
|
||||
"target_options": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"title": "Target Options",
|
||||
"type": "array"
|
||||
},
|
||||
"target_options_merge": {
|
||||
"title": "Target Options Merge",
|
||||
"type": "boolean"
|
||||
},
|
||||
"target_timeout": {
|
||||
"title": "Target Timeout",
|
||||
"type": "integer"
|
||||
},
|
||||
"target_workers": {
|
||||
"title": "Target Workers",
|
||||
"type": "integer"
|
||||
},
|
||||
"type": {
|
||||
"$ref": "#/definitions/TaskType"
|
||||
},
|
||||
"wait_for_files": {
|
||||
"$ref": "#/definitions/ContainerType"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"duration"
|
||||
],
|
||||
"title": "TaskDetails",
|
||||
"type": "object"
|
||||
},
|
||||
"TaskPool": {
|
||||
"properties": {
|
||||
"count": {
|
||||
"title": "Count",
|
||||
"type": "integer"
|
||||
},
|
||||
"pool_name": {
|
||||
"title": "Pool Name",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"count",
|
||||
"pool_name"
|
||||
],
|
||||
"title": "TaskPool",
|
||||
"type": "object"
|
||||
},
|
||||
"TaskType": {
|
||||
"description": "An enumeration.",
|
||||
"enum": [
|
||||
"libfuzzer_fuzz",
|
||||
"libfuzzer_coverage",
|
||||
"libfuzzer_crash_report",
|
||||
"libfuzzer_merge",
|
||||
"generic_analysis",
|
||||
"generic_supervisor",
|
||||
"generic_merge",
|
||||
"generic_generator",
|
||||
"generic_crash_report"
|
||||
],
|
||||
"title": "TaskType"
|
||||
},
|
||||
"TaskVm": {
|
||||
"properties": {
|
||||
"count": {
|
||||
"default": 1,
|
||||
"title": "Count",
|
||||
"type": "integer"
|
||||
},
|
||||
"image": {
|
||||
"title": "Image",
|
||||
"type": "string"
|
||||
},
|
||||
"reboot_after_setup": {
|
||||
"title": "Reboot After Setup",
|
||||
"type": "boolean"
|
||||
},
|
||||
"region": {
|
||||
"title": "Region",
|
||||
"type": "string"
|
||||
},
|
||||
"sku": {
|
||||
"title": "Sku",
|
||||
"type": "string"
|
||||
},
|
||||
"spot_instances": {
|
||||
"default": false,
|
||||
"title": "Spot Instances",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"region",
|
||||
"sku",
|
||||
"image"
|
||||
],
|
||||
"title": "TaskVm",
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"config": {
|
||||
"$ref": "#/definitions/TaskConfig"
|
||||
},
|
||||
"job_id": {
|
||||
"format": "uuid",
|
||||
"title": "Job Id",
|
||||
"type": "string"
|
||||
},
|
||||
"task_id": {
|
||||
"format": "uuid",
|
||||
"title": "Task Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"job_id",
|
||||
"task_id",
|
||||
"config"
|
||||
],
|
||||
"title": "EventTaskHeartbeat",
|
||||
"type": "object"
|
||||
}
|
||||
```
|
||||
|
||||
### task_state_updated
|
||||
|
||||
#### Example
|
||||
@ -3108,6 +3549,31 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
"title": "EventNodeDeleted",
|
||||
"type": "object"
|
||||
},
|
||||
"EventNodeHeartbeat": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"machine_id": {
|
||||
"format": "uuid",
|
||||
"title": "Machine Id",
|
||||
"type": "string"
|
||||
},
|
||||
"pool_name": {
|
||||
"title": "Pool Name",
|
||||
"type": "string"
|
||||
},
|
||||
"scaleset_id": {
|
||||
"format": "uuid",
|
||||
"title": "Scaleset Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"machine_id",
|
||||
"pool_name"
|
||||
],
|
||||
"title": "EventNodeHeartbeat",
|
||||
"type": "object"
|
||||
},
|
||||
"EventNodeStateUpdated": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
@ -3385,6 +3851,31 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
"title": "EventTaskFailed",
|
||||
"type": "object"
|
||||
},
|
||||
"EventTaskHeartbeat": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"config": {
|
||||
"$ref": "#/definitions/TaskConfig"
|
||||
},
|
||||
"job_id": {
|
||||
"format": "uuid",
|
||||
"title": "Job Id",
|
||||
"type": "string"
|
||||
},
|
||||
"task_id": {
|
||||
"format": "uuid",
|
||||
"title": "Task Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"job_id",
|
||||
"task_id",
|
||||
"config"
|
||||
],
|
||||
"title": "EventTaskHeartbeat",
|
||||
"type": "object"
|
||||
},
|
||||
"EventTaskStateUpdated": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
@ -3469,7 +3960,9 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
"task_state_updated",
|
||||
"task_stopped",
|
||||
"crash_reported",
|
||||
"file_added"
|
||||
"file_added",
|
||||
"task_heartbeat",
|
||||
"node_heartbeat"
|
||||
],
|
||||
"title": "EventType"
|
||||
},
|
||||
@ -3962,6 +4455,9 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
{
|
||||
"$ref": "#/definitions/EventNodeDeleted"
|
||||
},
|
||||
{
|
||||
"$ref": "#/definitions/EventNodeHeartbeat"
|
||||
},
|
||||
{
|
||||
"$ref": "#/definitions/EventPing"
|
||||
},
|
||||
@ -4001,6 +4497,9 @@ Each event will be submitted via HTTP POST to the user provided URL.
|
||||
{
|
||||
"$ref": "#/definitions/EventTaskStopped"
|
||||
},
|
||||
{
|
||||
"$ref": "#/definitions/EventTaskHeartbeat"
|
||||
},
|
||||
{
|
||||
"$ref": "#/definitions/EventCrashReported"
|
||||
},
|
||||
|
@ -8,10 +8,11 @@ import json
|
||||
import logging
|
||||
|
||||
import azure.functions as func
|
||||
from onefuzztypes.events import EventNodeHeartbeat
|
||||
from onefuzztypes.models import NodeHeartbeatEntry
|
||||
from pydantic import ValidationError
|
||||
|
||||
from ..onefuzzlib.events import get_events
|
||||
from ..onefuzzlib.events import get_events, send_event
|
||||
from ..onefuzzlib.workers.nodes import Node
|
||||
|
||||
|
||||
@ -27,6 +28,13 @@ def main(msg: func.QueueMessage, dashboard: func.Out[str]) -> None:
|
||||
return
|
||||
node.heartbeat = datetime.datetime.utcnow()
|
||||
node.save()
|
||||
send_event(
|
||||
EventNodeHeartbeat(
|
||||
machine_id=node.machine_id,
|
||||
scaleset_id=node.scaleset_id,
|
||||
pool_name=node.pool_name,
|
||||
)
|
||||
)
|
||||
except ValidationError:
|
||||
logging.error("invalid node heartbeat: %s", raw)
|
||||
|
||||
|
@ -8,10 +8,11 @@ import logging
|
||||
from datetime import datetime
|
||||
|
||||
import azure.functions as func
|
||||
from onefuzztypes.events import EventTaskHeartbeat
|
||||
from onefuzztypes.models import Error, TaskHeartbeatEntry
|
||||
from pydantic import ValidationError
|
||||
|
||||
from ..onefuzzlib.events import get_events
|
||||
from ..onefuzzlib.events import get_events, send_event
|
||||
from ..onefuzzlib.tasks.main import Task
|
||||
|
||||
|
||||
@ -29,6 +30,11 @@ def main(msg: func.QueueMessage, dashboard: func.Out[str]) -> None:
|
||||
if task:
|
||||
task.heartbeat = datetime.utcnow()
|
||||
task.save()
|
||||
send_event(
|
||||
EventTaskHeartbeat(
|
||||
job_id=task.job_id, task_id=task.task_id, config=task.config
|
||||
)
|
||||
)
|
||||
except ValidationError:
|
||||
logging.error("invalid task heartbeat: %s", raw)
|
||||
|
||||
|
@ -47,6 +47,8 @@ from onefuzztypes.events import (
|
||||
EventNodeStateUpdated,
|
||||
EventNodeCreated,
|
||||
EventNodeDeleted,
|
||||
EventNodeHeartbeat,
|
||||
EventTaskHeartbeat,
|
||||
get_event_type,
|
||||
EventType,
|
||||
)
|
||||
@ -194,6 +196,8 @@ def main():
|
||||
),
|
||||
),
|
||||
EventFileAdded(container=Container("container-name"), filename="example.txt"),
|
||||
EventNodeHeartbeat(machine_id=UUID(int=0), pool_name="example"),
|
||||
EventTaskHeartbeat(task_id=UUID(int=0), job_id=UUID(int=0), config=task_config),
|
||||
]
|
||||
|
||||
for event in Event.__args__:
|
||||
|
@ -63,6 +63,12 @@ class EventTaskStateUpdated(BaseEvent):
|
||||
config: TaskConfig
|
||||
|
||||
|
||||
class EventTaskHeartbeat(BaseEvent):
|
||||
job_id: UUID
|
||||
task_id: UUID
|
||||
config: TaskConfig
|
||||
|
||||
|
||||
class EventPing(BaseResponse):
|
||||
ping_id: UUID
|
||||
|
||||
@ -118,6 +124,12 @@ class EventNodeCreated(BaseEvent):
|
||||
pool_name: PoolName
|
||||
|
||||
|
||||
class EventNodeHeartbeat(BaseEvent):
|
||||
machine_id: UUID
|
||||
scaleset_id: Optional[UUID]
|
||||
pool_name: PoolName
|
||||
|
||||
|
||||
class EventNodeDeleted(BaseEvent):
|
||||
machine_id: UUID
|
||||
scaleset_id: Optional[UUID]
|
||||
@ -148,6 +160,7 @@ Event = Union[
|
||||
EventNodeStateUpdated,
|
||||
EventNodeCreated,
|
||||
EventNodeDeleted,
|
||||
EventNodeHeartbeat,
|
||||
EventPing,
|
||||
EventPoolCreated,
|
||||
EventPoolDeleted,
|
||||
@ -161,6 +174,7 @@ Event = Union[
|
||||
EventTaskStateUpdated,
|
||||
EventTaskCreated,
|
||||
EventTaskStopped,
|
||||
EventTaskHeartbeat,
|
||||
EventCrashReported,
|
||||
EventFileAdded,
|
||||
]
|
||||
@ -187,6 +201,8 @@ class EventType(Enum):
|
||||
task_stopped = "task_stopped"
|
||||
crash_reported = "crash_reported"
|
||||
file_added = "file_added"
|
||||
task_heartbeat = "task_heartbeat"
|
||||
node_heartbeat = "node_heartbeat"
|
||||
|
||||
|
||||
EventTypeMap = {
|
||||
@ -195,6 +211,7 @@ EventTypeMap = {
|
||||
EventType.node_created: EventNodeCreated,
|
||||
EventType.node_deleted: EventNodeDeleted,
|
||||
EventType.node_state_updated: EventNodeStateUpdated,
|
||||
EventType.node_heartbeat: EventNodeHeartbeat,
|
||||
EventType.ping: EventPing,
|
||||
EventType.pool_created: EventPoolCreated,
|
||||
EventType.pool_deleted: EventPoolDeleted,
|
||||
@ -207,6 +224,7 @@ EventTypeMap = {
|
||||
EventType.task_created: EventTaskCreated,
|
||||
EventType.task_failed: EventTaskFailed,
|
||||
EventType.task_state_updated: EventTaskStateUpdated,
|
||||
EventType.task_heartbeat: EventTaskHeartbeat,
|
||||
EventType.task_stopped: EventTaskStopped,
|
||||
EventType.crash_reported: EventCrashReported,
|
||||
EventType.file_added: EventFileAdded,
|
||||
|
Reference in New Issue
Block a user