mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-16 20:08:09 +00:00
Colocate tasks (#402)
Enables co-locating multiple tasks in a given work-set. Tasks are bucketed by the following: * OS * job id * setup container * VM SKU & image (used in pre-1.0 style tasks) * pool name (used in 1.0+ style tasks) * if the task needs rebooting after the task setup script executes. Additionally, a task will end up in a unique bucket if any of the following are true: * The task is set to run on more than one VM * The task is missing the `task.config.colocate` flag (all tasks created prior to this functionality) or the value is False This updates the libfuzzer template to make use of colocation. Users can specify co-locating all of the tasks *or* co-locating the secondary tasks.
This commit is contained in:
@ -4,24 +4,30 @@
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import logging
|
||||
from typing import Dict, List
|
||||
from uuid import UUID
|
||||
from typing import Dict, Generator, List, Optional, Tuple, TypeVar
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from onefuzztypes.enums import OS, PoolState, TaskState
|
||||
from onefuzztypes.models import WorkSet, WorkUnit
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..azure.containers import (
|
||||
StorageType,
|
||||
blob_exists,
|
||||
get_container_sas_url,
|
||||
save_blob,
|
||||
)
|
||||
from ..azure.containers import StorageType, blob_exists, get_container_sas_url
|
||||
from ..pools import Pool
|
||||
from .config import build_task_config, get_setup_container
|
||||
from .main import Task
|
||||
|
||||
HOURS = 60 * 60
|
||||
|
||||
# TODO: eventually, this should be tied to the pool.
|
||||
MAX_TASKS_PER_SET = 10
|
||||
|
||||
|
||||
A = TypeVar("A")
|
||||
|
||||
|
||||
def chunks(items: List[A], size: int) -> Generator[List[A], None, None]:
|
||||
return (items[x : x + size] for x in range(0, len(items), size))
|
||||
|
||||
|
||||
def schedule_workset(workset: WorkSet, pool: Pool, count: int) -> bool:
|
||||
if pool.state not in PoolState.available():
|
||||
@ -39,88 +45,198 @@ def schedule_workset(workset: WorkSet, pool: Pool, count: int) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
# TODO - Once Pydantic supports hashable models, the Tuple should be replaced
|
||||
# with a model.
|
||||
#
|
||||
# For info: https://github.com/samuelcolvin/pydantic/pull/1881
|
||||
|
||||
|
||||
def bucket_tasks(tasks: List[Task]) -> Dict[Tuple, List[Task]]:
|
||||
# buckets are hashed by:
|
||||
# OS, JOB ID, vm sku & image (if available), pool name (if available),
|
||||
# if the setup script requires rebooting, and a 'unique' value
|
||||
#
|
||||
# The unique value is set based on the following conditions:
|
||||
# * if the task is set to run on more than one VM, than we assume it can't be shared
|
||||
# * if the task is missing the 'colocate' flag or it's set to False
|
||||
|
||||
buckets: Dict[Tuple, List[Task]] = {}
|
||||
|
||||
for task in tasks:
|
||||
vm: Optional[Tuple[str, str]] = None
|
||||
pool: Optional[str] = None
|
||||
unique: Optional[UUID] = None
|
||||
|
||||
# check for multiple VMs for pre-1.0.0 tasks
|
||||
if task.config.vm:
|
||||
vm = (task.config.vm.sku, task.config.vm.image)
|
||||
if task.config.vm.count > 1:
|
||||
unique = uuid4()
|
||||
|
||||
# check for multiple VMs for 1.0.0 and later tasks
|
||||
if task.config.pool:
|
||||
pool = task.config.pool.pool_name
|
||||
if task.config.pool.count > 1:
|
||||
unique = uuid4()
|
||||
|
||||
if not task.config.colocate:
|
||||
unique = uuid4()
|
||||
|
||||
key = (
|
||||
task.os,
|
||||
task.job_id,
|
||||
vm,
|
||||
pool,
|
||||
get_setup_container(task.config),
|
||||
task.config.task.reboot_after_setup,
|
||||
unique,
|
||||
)
|
||||
if key not in buckets:
|
||||
buckets[key] = []
|
||||
buckets[key].append(task)
|
||||
|
||||
return buckets
|
||||
|
||||
|
||||
class BucketConfig(BaseModel):
|
||||
count: int
|
||||
reboot: bool
|
||||
setup_url: str
|
||||
setup_script: Optional[str]
|
||||
pool: Pool
|
||||
|
||||
|
||||
def build_work_unit(task: Task) -> Optional[Tuple[BucketConfig, WorkUnit]]:
|
||||
pool = task.get_pool()
|
||||
if not pool:
|
||||
logging.info("unable to find pool for task: %s", task.task_id)
|
||||
return None
|
||||
|
||||
logging.info("scheduling task: %s", task.task_id)
|
||||
|
||||
task_config = build_task_config(task.job_id, task.task_id, task.config)
|
||||
|
||||
setup_container = get_setup_container(task.config)
|
||||
setup_url = get_container_sas_url(
|
||||
setup_container, StorageType.corpus, read=True, list=True
|
||||
)
|
||||
|
||||
setup_script = None
|
||||
|
||||
if task.os == OS.windows and blob_exists(
|
||||
setup_container, "setup.ps1", StorageType.corpus
|
||||
):
|
||||
setup_script = "setup.ps1"
|
||||
if task.os == OS.linux and blob_exists(
|
||||
setup_container, "setup.sh", StorageType.corpus
|
||||
):
|
||||
setup_script = "setup.sh"
|
||||
|
||||
reboot = False
|
||||
count = 1
|
||||
if task.config.pool:
|
||||
count = task.config.pool.count
|
||||
|
||||
# NOTE: "is True" is required to handle Optional[bool]
|
||||
reboot = task.config.task.reboot_after_setup is True
|
||||
elif task.config.vm:
|
||||
# this branch should go away when we stop letting people specify
|
||||
# VM configs directly.
|
||||
count = task.config.vm.count
|
||||
|
||||
# NOTE: "is True" is required to handle Optional[bool]
|
||||
reboot = (
|
||||
task.config.vm.reboot_after_setup is True
|
||||
or task.config.task.reboot_after_setup is True
|
||||
)
|
||||
else:
|
||||
raise TypeError
|
||||
|
||||
work_unit = WorkUnit(
|
||||
job_id=task_config.job_id,
|
||||
task_id=task_config.task_id,
|
||||
task_type=task_config.task_type,
|
||||
config=task_config.json(),
|
||||
)
|
||||
|
||||
bucket_config = BucketConfig(
|
||||
pool=pool,
|
||||
count=count,
|
||||
reboot=reboot,
|
||||
setup_script=setup_script,
|
||||
setup_url=setup_url,
|
||||
)
|
||||
|
||||
return bucket_config, work_unit
|
||||
|
||||
|
||||
def build_work_set(tasks: List[Task]) -> Optional[Tuple[BucketConfig, WorkSet]]:
|
||||
task_ids = [x.task_id for x in tasks]
|
||||
|
||||
bucket_config: Optional[BucketConfig] = None
|
||||
work_units = []
|
||||
|
||||
for task in tasks:
|
||||
if task.config.prereq_tasks:
|
||||
# if all of the prereqs are in this bucket, they will be
|
||||
# scheduled together
|
||||
if not all([task_id in task_ids for task_id in task.config.prereq_tasks]):
|
||||
if not task.check_prereq_tasks():
|
||||
continue
|
||||
|
||||
result = build_work_unit(task)
|
||||
if not result:
|
||||
continue
|
||||
|
||||
new_bucket_config, work_unit = result
|
||||
if bucket_config is None:
|
||||
bucket_config = new_bucket_config
|
||||
else:
|
||||
if bucket_config != new_bucket_config:
|
||||
raise Exception(
|
||||
f"bucket configs differ: {bucket_config} VS {new_bucket_config}"
|
||||
)
|
||||
|
||||
work_units.append(work_unit)
|
||||
|
||||
if bucket_config:
|
||||
work_set = WorkSet(
|
||||
reboot=bucket_config.reboot,
|
||||
script=(bucket_config.setup_script is not None),
|
||||
setup_url=bucket_config.setup_url,
|
||||
work_units=work_units,
|
||||
)
|
||||
return (bucket_config, work_set)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def schedule_tasks() -> None:
|
||||
to_schedule: Dict[UUID, List[Task]] = {}
|
||||
tasks: List[Task] = []
|
||||
|
||||
tasks = Task.search_states(states=[TaskState.waiting])
|
||||
|
||||
tasks_by_id = {x.task_id: x for x in tasks}
|
||||
seen = set()
|
||||
|
||||
not_ready_count = 0
|
||||
|
||||
for task in Task.search_states(states=[TaskState.waiting]):
|
||||
if not task.ready_to_schedule():
|
||||
not_ready_count += 1
|
||||
continue
|
||||
buckets = bucket_tasks(tasks)
|
||||
|
||||
if task.job_id not in to_schedule:
|
||||
to_schedule[task.job_id] = []
|
||||
to_schedule[task.job_id].append(task)
|
||||
|
||||
if not to_schedule and not_ready_count > 0:
|
||||
logging.info("tasks not ready: %d", not_ready_count)
|
||||
|
||||
for tasks in to_schedule.values():
|
||||
# TODO: for now, we're only scheduling one task per VM.
|
||||
|
||||
for task in tasks:
|
||||
logging.info("scheduling task: %s", task.task_id)
|
||||
agent_config = build_task_config(task.job_id, task.task_id, task.config)
|
||||
|
||||
setup_container = get_setup_container(task.config)
|
||||
setup_url = get_container_sas_url(
|
||||
setup_container, StorageType.corpus, read=True, list=True
|
||||
)
|
||||
|
||||
setup_script = None
|
||||
|
||||
if task.os == OS.windows and blob_exists(
|
||||
setup_container, "setup.ps1", StorageType.corpus
|
||||
):
|
||||
setup_script = "setup.ps1"
|
||||
if task.os == OS.linux and blob_exists(
|
||||
setup_container, "setup.sh", StorageType.corpus
|
||||
):
|
||||
setup_script = "setup.sh"
|
||||
|
||||
save_blob(
|
||||
"task-configs",
|
||||
"%s/config.json" % task.task_id,
|
||||
agent_config.json(exclude_none=True),
|
||||
StorageType.config,
|
||||
)
|
||||
reboot = False
|
||||
count = 1
|
||||
if task.config.pool:
|
||||
count = task.config.pool.count
|
||||
reboot = task.config.task.reboot_after_setup is True
|
||||
elif task.config.vm:
|
||||
# this branch should go away when we stop letting people specify
|
||||
# VM configs directly.
|
||||
count = task.config.vm.count
|
||||
reboot = (
|
||||
task.config.vm.reboot_after_setup is True
|
||||
or task.config.task.reboot_after_setup is True
|
||||
)
|
||||
|
||||
task_config = agent_config
|
||||
task_config_json = task_config.json()
|
||||
work_unit = WorkUnit(
|
||||
job_id=task_config.job_id,
|
||||
task_id=task_config.task_id,
|
||||
task_type=task_config.task_type,
|
||||
config=task_config_json,
|
||||
)
|
||||
|
||||
# For now, only offer singleton work sets.
|
||||
workset = WorkSet(
|
||||
reboot=reboot,
|
||||
script=(setup_script is not None),
|
||||
setup_url=setup_url,
|
||||
work_units=[work_unit],
|
||||
)
|
||||
|
||||
pool = task.get_pool()
|
||||
if not pool:
|
||||
logging.info("unable to find pool for task: %s", task.task_id)
|
||||
for bucketed_tasks in buckets.values():
|
||||
for chunk in chunks(bucketed_tasks, MAX_TASKS_PER_SET):
|
||||
result = build_work_set(chunk)
|
||||
if result is None:
|
||||
continue
|
||||
bucket_config, work_set = result
|
||||
|
||||
if schedule_workset(workset, pool, count):
|
||||
task.state = TaskState.scheduled
|
||||
task.save()
|
||||
if schedule_workset(work_set, bucket_config.pool, bucket_config.count):
|
||||
for work_unit in work_set.work_units:
|
||||
task = tasks_by_id[work_unit.task_id]
|
||||
task.state = TaskState.scheduled
|
||||
task.save()
|
||||
seen.add(task.task_id)
|
||||
|
||||
not_ready_count = len(tasks) - len(seen)
|
||||
if not_ready_count > 0:
|
||||
logging.info("tasks not ready: %d", not_ready_count)
|
||||
|
Reference in New Issue
Block a user