mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-16 11:58:09 +00:00
reimage outdated nodes (#33)
* reimage outdated nodes * import sort, version check * clear node messages on registration Co-authored-by: bmc-msft <41130664+bmc-msft@users.noreply.github.com>
This commit is contained in:
@ -115,7 +115,9 @@ impl Registration {
|
|||||||
|
|
||||||
if managed {
|
if managed {
|
||||||
let scaleset = onefuzz::machine_id::get_scaleset_name().await?;
|
let scaleset = onefuzz::machine_id::get_scaleset_name().await?;
|
||||||
url.query_pairs_mut().append_pair("scaleset_id", &scaleset);
|
url.query_pairs_mut()
|
||||||
|
.append_pair("scaleset_id", &scaleset)
|
||||||
|
.append_pair("version", env!("ONEFUZZ_VERSION"));
|
||||||
}
|
}
|
||||||
// The registration can fail because this call is made before the virtual machine scaleset is done provisioning
|
// The registration can fail because this call is made before the virtual machine scaleset is done provisioning
|
||||||
// The authentication layer of the service will reject this request when that happens
|
// The authentication layer of the service will reject this request when that happens
|
||||||
|
@ -14,7 +14,7 @@ from onefuzztypes.responses import AgentRegistration
|
|||||||
from ..onefuzzlib.agent_authorization import verify_token
|
from ..onefuzzlib.agent_authorization import verify_token
|
||||||
from ..onefuzzlib.azure.creds import get_fuzz_storage, get_instance_name
|
from ..onefuzzlib.azure.creds import get_fuzz_storage, get_instance_name
|
||||||
from ..onefuzzlib.azure.queue import get_queue_sas
|
from ..onefuzzlib.azure.queue import get_queue_sas
|
||||||
from ..onefuzzlib.pools import Node, Pool
|
from ..onefuzzlib.pools import Node, NodeMessage, Pool
|
||||||
from ..onefuzzlib.request import not_ok, ok, parse_uri
|
from ..onefuzzlib.request import not_ok, ok, parse_uri
|
||||||
|
|
||||||
|
|
||||||
@ -44,7 +44,6 @@ def get(req: func.HttpRequest) -> func.HttpResponse:
|
|||||||
if isinstance(get_registration, Error):
|
if isinstance(get_registration, Error):
|
||||||
return not_ok(get_registration, context="agent registration")
|
return not_ok(get_registration, context="agent registration")
|
||||||
|
|
||||||
# check if an existone registration exists
|
|
||||||
agent_node = Node.get_by_machine_id(get_registration.machine_id)
|
agent_node = Node.get_by_machine_id(get_registration.machine_id)
|
||||||
|
|
||||||
if agent_node is None:
|
if agent_node is None:
|
||||||
@ -79,7 +78,6 @@ def post(req: func.HttpRequest) -> func.HttpResponse:
|
|||||||
registration_request = parse_uri(AgentRegistrationPost, req)
|
registration_request = parse_uri(AgentRegistrationPost, req)
|
||||||
if isinstance(registration_request, Error):
|
if isinstance(registration_request, Error):
|
||||||
return not_ok(registration_request, context="agent registration")
|
return not_ok(registration_request, context="agent registration")
|
||||||
# check if an existone registration exists
|
|
||||||
agent_node = Node.get_by_machine_id(registration_request.machine_id)
|
agent_node = Node.get_by_machine_id(registration_request.machine_id)
|
||||||
|
|
||||||
pool = Pool.get_by_name(registration_request.pool_name)
|
pool = Pool.get_by_name(registration_request.pool_name)
|
||||||
@ -97,8 +95,13 @@ def post(req: func.HttpRequest) -> func.HttpResponse:
|
|||||||
pool_name=registration_request.pool_name,
|
pool_name=registration_request.pool_name,
|
||||||
machine_id=registration_request.machine_id,
|
machine_id=registration_request.machine_id,
|
||||||
scaleset_id=registration_request.scaleset_id,
|
scaleset_id=registration_request.scaleset_id,
|
||||||
|
version=registration_request.version
|
||||||
)
|
)
|
||||||
agent_node.save()
|
agent_node.save()
|
||||||
|
elif agent_node.version.lower != registration_request.version:
|
||||||
|
NodeMessage.clear_messages(agent_node.machine_id)
|
||||||
|
agent_node.version = registration_request.version
|
||||||
|
agent_node.save()
|
||||||
|
|
||||||
return create_registration_response(agent_node.machine_id, pool)
|
return create_registration_response(agent_node.machine_id, pool)
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@ from onefuzztypes.models import Scaleset as BASE_SCALESET
|
|||||||
from onefuzztypes.models import (
|
from onefuzztypes.models import (
|
||||||
ScalesetNodeState,
|
ScalesetNodeState,
|
||||||
ScalesetSummary,
|
ScalesetSummary,
|
||||||
|
StopNodeCommand,
|
||||||
WorkSet,
|
WorkSet,
|
||||||
WorkSetSummary,
|
WorkSetSummary,
|
||||||
WorkUnitSummary,
|
WorkUnitSummary,
|
||||||
@ -80,6 +81,28 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
query["pool_name"] = [pool_name]
|
query["pool_name"] = [pool_name]
|
||||||
return cls.search(query=query)
|
return cls.search(query=query)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def search_outdated(
|
||||||
|
cls,
|
||||||
|
*,
|
||||||
|
scaleset_id: Optional[UUID] = None,
|
||||||
|
states: Optional[List[NodeState]] = None,
|
||||||
|
pool_name: Optional[str] = None,
|
||||||
|
) -> List["Node"]:
|
||||||
|
query: QueryFilter = {}
|
||||||
|
if scaleset_id:
|
||||||
|
query["scaleset_id"] = [scaleset_id]
|
||||||
|
if states:
|
||||||
|
query["state"] = states
|
||||||
|
if pool_name:
|
||||||
|
query["pool_name"] = [pool_name]
|
||||||
|
|
||||||
|
# azure table query always return false when the column does not exist
|
||||||
|
# We write the query this way to allow us to get the nodes where the
|
||||||
|
# version is not defined as well as the nodes with a mismatched version
|
||||||
|
version_query = "not (version ne '%s')" % __version__
|
||||||
|
return cls.search(query=query, raw_unchecked_filter=version_query)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_by_machine_id(cls, machine_id: UUID) -> Optional["Node"]:
|
def get_by_machine_id(cls, machine_id: UUID) -> Optional["Node"]:
|
||||||
nodes = cls.search(query={"machine_id": [machine_id]})
|
nodes = cls.search(query={"machine_id": [machine_id]})
|
||||||
@ -132,9 +155,7 @@ class Node(BASE_NODE, ORMMixin):
|
|||||||
for node in nodes:
|
for node in nodes:
|
||||||
if node.state not in NodeState.ready_for_reset():
|
if node.state not in NodeState.ready_for_reset():
|
||||||
logging.info(
|
logging.info(
|
||||||
"stopping task %s on machine_id:%s",
|
"stopping task %s on machine_id:%s", task_id, node.machine_id,
|
||||||
task_id,
|
|
||||||
node.machine_id,
|
|
||||||
)
|
)
|
||||||
node.state = NodeState.done
|
node.state = NodeState.done
|
||||||
node.save()
|
node.save()
|
||||||
@ -203,6 +224,12 @@ class NodeMessage(ORMMixin):
|
|||||||
|
|
||||||
client.commit_batch(cls.table_name(), batch)
|
client.commit_batch(cls.table_name(), batch)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def clear_messages(cls, agent_id: UUID) -> None:
|
||||||
|
messages = cls.get_messages(agent_id)
|
||||||
|
message_ids = [m.message_id for m in messages]
|
||||||
|
cls.delete_messages(agent_id, message_ids)
|
||||||
|
|
||||||
|
|
||||||
class Pool(BASE_POOL, ORMMixin):
|
class Pool(BASE_POOL, ORMMixin):
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -569,13 +596,29 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
nodes = Node.search_states(
|
nodes = Node.search_states(
|
||||||
scaleset_id=self.scaleset_id, states=NodeState.ready_for_reset()
|
scaleset_id=self.scaleset_id, states=NodeState.ready_for_reset()
|
||||||
)
|
)
|
||||||
if not nodes:
|
|
||||||
|
outdated = Node.search_outdated(
|
||||||
|
scaleset_id=self.scaleset_id,
|
||||||
|
states=[NodeState.free],
|
||||||
|
)
|
||||||
|
|
||||||
|
if not (nodes or outdated):
|
||||||
logging.debug("scaleset node gc done (no nodes) %s", self.scaleset_id)
|
logging.debug("scaleset node gc done (no nodes) %s", self.scaleset_id)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
to_delete = []
|
to_delete = []
|
||||||
to_reimage = []
|
to_reimage = []
|
||||||
|
|
||||||
|
for node in outdated:
|
||||||
|
if node.version == "1.0.0":
|
||||||
|
to_reimage.append(node)
|
||||||
|
else:
|
||||||
|
stop_message = NodeMessage(
|
||||||
|
agent_id=node.machine_id,
|
||||||
|
message=NodeCommand(stop=StopNodeCommand()),
|
||||||
|
)
|
||||||
|
stop_message.save()
|
||||||
|
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
# delete nodes that are not waiting on the scaleset GC
|
# delete nodes that are not waiting on the scaleset GC
|
||||||
if not node.scaleset_node_exists():
|
if not node.scaleset_node_exists():
|
||||||
@ -779,8 +822,7 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
break
|
break
|
||||||
if not node_state:
|
if not node_state:
|
||||||
node_state = ScalesetNodeState(
|
node_state = ScalesetNodeState(
|
||||||
machine_id=machine_id,
|
machine_id=machine_id, instance_id=instance_id,
|
||||||
instance_id=instance_id,
|
|
||||||
)
|
)
|
||||||
self.nodes.append(node_state)
|
self.nodes.append(node_state)
|
||||||
|
|
||||||
|
@ -82,6 +82,7 @@ class AgentRegistrationPost(BaseRequest):
|
|||||||
pool_name: PoolName
|
pool_name: PoolName
|
||||||
scaleset_id: Optional[UUID]
|
scaleset_id: Optional[UUID]
|
||||||
machine_id: UUID
|
machine_id: UUID
|
||||||
|
version: str
|
||||||
|
|
||||||
|
|
||||||
class PoolCreate(BaseRequest):
|
class PoolCreate(BaseRequest):
|
||||||
|
Reference in New Issue
Block a user