mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-18 12:48:07 +00:00
Enable scale in protection on VMSS instances (#1647)
* draft attempt at adding scaling protection * Service can now control scaling protection policy on VM instances * Improve logging a bit * draft attempt at adding scaling protection * Service can now control scaling protection policy on VM instances * Improve logging a bit * Error message was missing info * Linter * Don't schedule work if we can't protect the node * Last of the linter changes
This commit is contained in:
@ -18,6 +18,7 @@ from azure.mgmt.compute.models import (
|
||||
ResourceSkuRestrictionsType,
|
||||
VirtualMachineScaleSetVMInstanceIDs,
|
||||
VirtualMachineScaleSetVMInstanceRequiredIDs,
|
||||
VirtualMachineScaleSetVMProtectionPolicy,
|
||||
)
|
||||
from memoization import cached
|
||||
from msrestazure.azure_exceptions import CloudError
|
||||
@ -148,6 +149,54 @@ def get_instance_id(name: UUID, vm_id: UUID) -> Union[str, Error]:
|
||||
)
|
||||
|
||||
|
||||
@retry_on_auth_failure()
|
||||
def update_scale_in_protection(
|
||||
name: UUID, vm_id: UUID, protect_from_scale_in: bool
|
||||
) -> Optional[Error]:
|
||||
instance_id = get_instance_id(name, vm_id)
|
||||
|
||||
if isinstance(instance_id, Error):
|
||||
return instance_id
|
||||
|
||||
compute_client = get_compute_client()
|
||||
resource_group = get_base_resource_group()
|
||||
|
||||
try:
|
||||
instance_vm = compute_client.virtual_machine_scale_set_vms.get(
|
||||
resource_group, name, instance_id
|
||||
)
|
||||
except (ResourceNotFoundError, CloudError):
|
||||
return Error(
|
||||
code=ErrorCode.UNABLE_TO_FIND,
|
||||
errors=["unable to find vm instance: %s:%s" % (name, instance_id)],
|
||||
)
|
||||
|
||||
new_protection_policy = VirtualMachineScaleSetVMProtectionPolicy(
|
||||
protect_from_scale_in=protect_from_scale_in
|
||||
)
|
||||
if instance_vm.protection_policy is not None:
|
||||
new_protection_policy = instance_vm.protection_policy
|
||||
new_protection_policy.protect_from_scale_in = protect_from_scale_in
|
||||
|
||||
instance_vm.protection_policy = new_protection_policy
|
||||
|
||||
try:
|
||||
compute_client.virtual_machine_scale_set_vms.begin_update(
|
||||
resource_group, name, instance_id, instance_vm
|
||||
)
|
||||
except (ResourceNotFoundError, CloudError):
|
||||
return Error(
|
||||
code=ErrorCode.UNABLE_TO_UPDATE,
|
||||
errors=["unable to set protection policy on: %s:%s" % (vm_id, instance_id)],
|
||||
)
|
||||
|
||||
logging.info(
|
||||
"Successfully set scale in protection on node %s to %s"
|
||||
% (vm_id, protect_from_scale_in)
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
class UnableToUpdate(Exception):
|
||||
pass
|
||||
|
||||
|
@ -28,7 +28,7 @@ from onefuzztypes.primitives import PoolName
|
||||
from pydantic import Field
|
||||
|
||||
from ..__version__ import __version__
|
||||
from ..azure.vmss import get_instance_id
|
||||
from ..azure.vmss import get_instance_id, update_scale_in_protection
|
||||
from ..events import send_event
|
||||
from ..orm import MappingIntStrAny, ORMMixin, QueryFilter
|
||||
from ..versions import is_minimum_version
|
||||
@ -390,6 +390,7 @@ class Node(BASE_NODE, ORMMixin):
|
||||
# if we're going to reimage, make sure the node doesn't pick up new work
|
||||
# too.
|
||||
self.send_stop_if_free()
|
||||
self.release_scale_in_protection()
|
||||
|
||||
self.save()
|
||||
|
||||
@ -502,6 +503,26 @@ class Node(BASE_NODE, ORMMixin):
|
||||
)
|
||||
)
|
||||
|
||||
def acquire_scale_in_protection(self) -> Optional[Error]:
|
||||
if self.scaleset_node_exists() and self.scaleset_id:
|
||||
logging.info("Setting scale-in protection on node %s", self.machine_id)
|
||||
return update_scale_in_protection(
|
||||
self.scaleset_id, self.machine_id, protect_from_scale_in=True
|
||||
)
|
||||
return None
|
||||
|
||||
def release_scale_in_protection(self) -> Optional[Error]:
|
||||
if (
|
||||
not self.debug_keep_node
|
||||
and self.scaleset_node_exists()
|
||||
and self.scaleset_id
|
||||
):
|
||||
logging.info("Removing scale-in protection on node %s", self.machine_id)
|
||||
return update_scale_in_protection(
|
||||
self.scaleset_id, self.machine_id, protect_from_scale_in=False
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
class NodeTasks(BASE_NODE_TASK, ORMMixin):
|
||||
@classmethod
|
||||
|
Reference in New Issue
Block a user