mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-14 19:08:08 +00:00
Create autoscale resources for scaleset (#1661)
* Initial progress to adding a auto scale resource
* auto scale API is ready
* When creating a scaleset, add an autoscale resource to it as well
* Auto scale is correctly linked with scaleset
* 🧹
* Lint
* Cleaned up
This commit is contained in:
145
src/api-service/__app__/onefuzzlib/azure/auto_scale.py
Normal file
145
src/api-service/__app__/onefuzzlib/azure/auto_scale.py
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Copyright (c) Microsoft Corporation.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Any, Dict, Optional, Union
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from azure.core.exceptions import ResourceNotFoundError
|
||||||
|
from azure.mgmt.monitor.models import (
|
||||||
|
AutoscaleProfile,
|
||||||
|
AutoscaleSettingResource,
|
||||||
|
ComparisonOperationType,
|
||||||
|
MetricStatisticType,
|
||||||
|
MetricTrigger,
|
||||||
|
ScaleAction,
|
||||||
|
ScaleCapacity,
|
||||||
|
ScaleDirection,
|
||||||
|
ScaleRule,
|
||||||
|
ScaleType,
|
||||||
|
TimeAggregationType,
|
||||||
|
)
|
||||||
|
from msrestazure.azure_exceptions import CloudError
|
||||||
|
from onefuzztypes.enums import ErrorCode
|
||||||
|
from onefuzztypes.models import Error
|
||||||
|
from onefuzztypes.primitives import Region
|
||||||
|
|
||||||
|
from .creds import (
|
||||||
|
get_base_region,
|
||||||
|
get_base_resource_group,
|
||||||
|
get_subscription,
|
||||||
|
retry_on_auth_failure,
|
||||||
|
)
|
||||||
|
from .monitor import get_monitor_client
|
||||||
|
|
||||||
|
|
||||||
|
@retry_on_auth_failure()
|
||||||
|
def add_auto_scale_to_vmss(
|
||||||
|
vmss: UUID, auto_scale_profile: AutoscaleProfile
|
||||||
|
) -> Optional[Error]:
|
||||||
|
logging.info("Checking scaleset %s for existing auto scale resources" % vmss)
|
||||||
|
client = get_monitor_client()
|
||||||
|
resource_group = get_base_resource_group()
|
||||||
|
|
||||||
|
auto_scale_resource_id = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
auto_scale_collections = client.autoscale_settings.list_by_resource_group(
|
||||||
|
resource_group
|
||||||
|
)
|
||||||
|
for auto_scale in auto_scale_collections:
|
||||||
|
if str(auto_scale.target_resource_uri).endswith(str(vmss)):
|
||||||
|
auto_scale_resource_id = auto_scale.id
|
||||||
|
break
|
||||||
|
except (ResourceNotFoundError, CloudError):
|
||||||
|
return Error(
|
||||||
|
code=ErrorCode.INVALID_CONFIGURATION,
|
||||||
|
errors=[
|
||||||
|
"Failed to check if scaleset %s already has an autoscale resource"
|
||||||
|
% vmss
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
if auto_scale_resource_id is not None:
|
||||||
|
logging.warning("Scaleset %s already has auto scale resource" % vmss)
|
||||||
|
return None
|
||||||
|
|
||||||
|
resource_creation = create_auto_scale_resource_for(
|
||||||
|
vmss, get_base_region(), auto_scale_profile
|
||||||
|
)
|
||||||
|
if isinstance(resource_creation, Error):
|
||||||
|
return resource_creation
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def create_auto_scale_resource_for(
|
||||||
|
resource_id: UUID, location: Region, profile: AutoscaleProfile
|
||||||
|
) -> Union[AutoscaleSettingResource, Error]:
|
||||||
|
logging.info("Creating auto scale resource for: %s" % resource_id)
|
||||||
|
client = get_monitor_client()
|
||||||
|
resource_group = get_base_resource_group()
|
||||||
|
subscription = get_subscription()
|
||||||
|
|
||||||
|
scaleset_uri = (
|
||||||
|
"/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/virtualMachineScaleSets/%s" # noqa: E501
|
||||||
|
% (subscription, resource_group, resource_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
params: Dict[str, Any] = {
|
||||||
|
"location": location,
|
||||||
|
"profiles": [profile],
|
||||||
|
"target_resource_uri": scaleset_uri,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
auto_scale_resource = client.autoscale_settings.create_or_update(
|
||||||
|
resource_group, str(uuid.uuid4()), params
|
||||||
|
)
|
||||||
|
logging.info(
|
||||||
|
"Successfully created auto scale resource %s for %s"
|
||||||
|
% (auto_scale_resource.id, resource_id)
|
||||||
|
)
|
||||||
|
return auto_scale_resource
|
||||||
|
except (ResourceNotFoundError, CloudError):
|
||||||
|
return Error(
|
||||||
|
code=ErrorCode.UNABLE_TO_CREATE,
|
||||||
|
errors=[
|
||||||
|
"unable to create auto scale resource for resource: %s with profile: %s"
|
||||||
|
% (resource_id, profile)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_auto_scale_profile(min: int, max: int, queue_uri: str) -> AutoscaleProfile:
|
||||||
|
return AutoscaleProfile(
|
||||||
|
name=str(uuid.uuid4()),
|
||||||
|
capacity=ScaleCapacity(minimum=min, maximum=max, default=max),
|
||||||
|
rules=[
|
||||||
|
ScaleRule(
|
||||||
|
metric_trigger=MetricTrigger(
|
||||||
|
metric_name="ApproximateMessageCount",
|
||||||
|
metric_resource_uri=queue_uri,
|
||||||
|
# Check every minute
|
||||||
|
time_grain=timedelta(minutes=1),
|
||||||
|
# The average amount of messages there are in the pool queue
|
||||||
|
time_aggregation=TimeAggregationType.AVERAGE,
|
||||||
|
statistic=MetricStatisticType.COUNT,
|
||||||
|
# Over the past 10 minutes
|
||||||
|
time_window=timedelta(minutes=10),
|
||||||
|
# When there's more than 1 message in the pool queue
|
||||||
|
operator=ComparisonOperationType.GREATER_THAN,
|
||||||
|
threshold=1,
|
||||||
|
),
|
||||||
|
scale_action=ScaleAction(
|
||||||
|
direction=ScaleDirection.INCREASE,
|
||||||
|
type=ScaleType.CHANGE_COUNT,
|
||||||
|
value=1,
|
||||||
|
cooldown=timedelta(minutes=5),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
29
src/api-service/__app__/onefuzzlib/azure/log_analytics.py
Normal file
29
src/api-service/__app__/onefuzzlib/azure/log_analytics.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Copyright (c) Microsoft Corporation.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from azure.mgmt.loganalytics import LogAnalyticsManagementClient
|
||||||
|
from memoization import cached
|
||||||
|
|
||||||
|
from .creds import get_base_resource_group, get_identity, get_subscription
|
||||||
|
|
||||||
|
|
||||||
|
@cached
|
||||||
|
def get_monitor_client() -> LogAnalyticsManagementClient:
|
||||||
|
return LogAnalyticsManagementClient(get_identity(), get_subscription())
|
||||||
|
|
||||||
|
|
||||||
|
@cached(ttl=60)
|
||||||
|
def get_monitor_settings() -> Dict[str, str]:
|
||||||
|
resource_group = get_base_resource_group()
|
||||||
|
workspace_name = os.environ["ONEFUZZ_MONITOR"]
|
||||||
|
client = get_monitor_client()
|
||||||
|
customer_id = client.workspaces.get(resource_group, workspace_name).customer_id
|
||||||
|
shared_key = client.shared_keys.get_shared_keys(
|
||||||
|
resource_group, workspace_name
|
||||||
|
).primary_shared_key
|
||||||
|
return {"id": customer_id, "key": shared_key}
|
@ -1,29 +1,9 @@
|
|||||||
#!/usr/bin/env python
|
from azure.mgmt.monitor import MonitorManagementClient
|
||||||
#
|
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT License.
|
|
||||||
|
|
||||||
import os
|
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
from azure.mgmt.loganalytics import LogAnalyticsManagementClient
|
|
||||||
from memoization import cached
|
from memoization import cached
|
||||||
|
|
||||||
from .creds import get_base_resource_group, get_identity, get_subscription
|
from .creds import get_identity, get_subscription
|
||||||
|
|
||||||
|
|
||||||
@cached
|
@cached
|
||||||
def get_monitor_client() -> LogAnalyticsManagementClient:
|
def get_monitor_client() -> MonitorManagementClient:
|
||||||
return LogAnalyticsManagementClient(get_identity(), get_subscription())
|
return MonitorManagementClient(get_identity(), get_subscription())
|
||||||
|
|
||||||
|
|
||||||
@cached(ttl=60)
|
|
||||||
def get_monitor_settings() -> Dict[str, str]:
|
|
||||||
resource_group = get_base_resource_group()
|
|
||||||
workspace_name = os.environ["ONEFUZZ_MONITOR"]
|
|
||||||
client = get_monitor_client()
|
|
||||||
customer_id = client.workspaces.get(resource_group, workspace_name).customer_id
|
|
||||||
shared_key = client.shared_keys.get_shared_keys(
|
|
||||||
resource_group, workspace_name
|
|
||||||
).primary_shared_key
|
|
||||||
return {"id": customer_id, "key": shared_key}
|
|
||||||
|
@ -195,3 +195,9 @@ def queue_object(
|
|||||||
return True
|
return True
|
||||||
except ResourceNotFoundError:
|
except ResourceNotFoundError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_resource_id(queue_name: QueueNameType, storage_type: StorageType) -> str:
|
||||||
|
account_id = get_primary_account(storage_type)
|
||||||
|
resource_uri = "%s/services/queue/queues/%s" % (account_id, queue_name)
|
||||||
|
return resource_uri
|
||||||
|
@ -25,7 +25,7 @@ from .azure.containers import (
|
|||||||
save_blob,
|
save_blob,
|
||||||
)
|
)
|
||||||
from .azure.creds import get_instance_id, get_instance_url
|
from .azure.creds import get_instance_id, get_instance_url
|
||||||
from .azure.monitor import get_monitor_settings
|
from .azure.log_analytics import get_monitor_settings
|
||||||
from .azure.queue import get_queue_sas
|
from .azure.queue import get_queue_sas
|
||||||
from .azure.storage import StorageType
|
from .azure.storage import StorageType
|
||||||
from .config import InstanceConfig
|
from .config import InstanceConfig
|
||||||
|
@ -23,8 +23,11 @@ from onefuzztypes.primitives import PoolName, Region
|
|||||||
|
|
||||||
from ..__version__ import __version__
|
from ..__version__ import __version__
|
||||||
from ..azure.auth import build_auth
|
from ..azure.auth import build_auth
|
||||||
|
from ..azure.auto_scale import add_auto_scale_to_vmss, create_auto_scale_profile
|
||||||
from ..azure.image import get_os
|
from ..azure.image import get_os
|
||||||
from ..azure.network import Network
|
from ..azure.network import Network
|
||||||
|
from ..azure.queue import get_resource_id
|
||||||
|
from ..azure.storage import StorageType
|
||||||
from ..azure.vmss import (
|
from ..azure.vmss import (
|
||||||
UnableToUpdate,
|
UnableToUpdate,
|
||||||
create_vmss,
|
create_vmss,
|
||||||
@ -242,6 +245,7 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
self.set_failed(result)
|
self.set_failed(result)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
|
# TODO: Link up auto scale resource with diagnostics
|
||||||
logging.info(
|
logging.info(
|
||||||
SCALESET_LOG_PREFIX + "creating scaleset scaleset_id:%s",
|
SCALESET_LOG_PREFIX + "creating scaleset scaleset_id:%s",
|
||||||
self.scaleset_id,
|
self.scaleset_id,
|
||||||
@ -257,6 +261,11 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
SCALESET_LOG_PREFIX + "scaleset running scaleset_id:%s",
|
SCALESET_LOG_PREFIX + "scaleset running scaleset_id:%s",
|
||||||
self.scaleset_id,
|
self.scaleset_id,
|
||||||
)
|
)
|
||||||
|
auto_scaling = self.try_to_enable_auto_scaling()
|
||||||
|
if isinstance(auto_scaling, Error):
|
||||||
|
self.set_failed(auto_scaling)
|
||||||
|
return
|
||||||
|
|
||||||
identity_result = self.try_set_identity(vmss)
|
identity_result = self.try_set_identity(vmss)
|
||||||
if identity_result:
|
if identity_result:
|
||||||
self.set_failed(identity_result)
|
self.set_failed(identity_result)
|
||||||
@ -823,3 +832,32 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
|||||||
state=self.state,
|
state=self.state,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def try_to_enable_auto_scaling(self) -> Optional[Error]:
|
||||||
|
from .pools import Pool
|
||||||
|
|
||||||
|
logging.info("Trying to add auto scaling for scaleset %s" % self.scaleset_id)
|
||||||
|
|
||||||
|
pool = Pool.get_by_name(self.pool_name)
|
||||||
|
if isinstance(pool, Error):
|
||||||
|
logging.error(
|
||||||
|
"Failed to get pool by name: %s error: %s" % (self.pool_name, pool)
|
||||||
|
)
|
||||||
|
return pool
|
||||||
|
|
||||||
|
pool_queue_id = pool.get_pool_queue()
|
||||||
|
pool_queue_uri = get_resource_id(pool_queue_id, StorageType.corpus)
|
||||||
|
capacity = get_vmss_size(self.scaleset_id)
|
||||||
|
if capacity is None:
|
||||||
|
capacity_failed = Error(
|
||||||
|
code=ErrorCode.UNABLE_TO_FIND,
|
||||||
|
errors=["Failed to get capacity for scaleset %s" % self.scaleset_id],
|
||||||
|
)
|
||||||
|
logging.error(capacity_failed)
|
||||||
|
return capacity_failed
|
||||||
|
|
||||||
|
auto_scale_profile = create_auto_scale_profile(
|
||||||
|
capacity, capacity, pool_queue_uri
|
||||||
|
)
|
||||||
|
logging.info("Added auto scale resource to scaleset: %s" % self.scaleset_id)
|
||||||
|
return add_auto_scale_to_vmss(self.scaleset_id, auto_scale_profile)
|
||||||
|
@ -9,6 +9,7 @@ azure-keyvault-secrets~=4.3.0
|
|||||||
azure-mgmt-compute==24.0.1
|
azure-mgmt-compute==24.0.1
|
||||||
azure-mgmt-core==1.3.0
|
azure-mgmt-core==1.3.0
|
||||||
azure-mgmt-loganalytics~=11.0.0
|
azure-mgmt-loganalytics~=11.0.0
|
||||||
|
azure-mgmt-monitor==3.0.0
|
||||||
azure-mgmt-network==19.0.0
|
azure-mgmt-network==19.0.0
|
||||||
azure-mgmt-storage~=18.0.0
|
azure-mgmt-storage~=18.0.0
|
||||||
azure-mgmt-resource~=18.1.0
|
azure-mgmt-resource~=18.1.0
|
||||||
|
@ -67,6 +67,7 @@
|
|||||||
"Storage Account Contributor": "17d1049b-9a84-46fb-8f53-869881c3d3ab",
|
"Storage Account Contributor": "17d1049b-9a84-46fb-8f53-869881c3d3ab",
|
||||||
"Virtual Machine Contributor": "9980e02c-c2be-4d73-94e8-173b1dc7cf3c",
|
"Virtual Machine Contributor": "9980e02c-c2be-4d73-94e8-173b1dc7cf3c",
|
||||||
"Storage Blob Data Reader": "2a2b9908-6ea1-4ae2-8e65-a410df84e7d1",
|
"Storage Blob Data Reader": "2a2b9908-6ea1-4ae2-8e65-a410df84e7d1",
|
||||||
|
"OneFuzz Deployment": "d4f7c2d9-6c1e-4caa-a39b-cba6d76bc647",
|
||||||
"keyVaultName": "[concat('of-kv-', uniquestring(resourceGroup().id))]"
|
"keyVaultName": "[concat('of-kv-', uniquestring(resourceGroup().id))]"
|
||||||
},
|
},
|
||||||
"functions": [
|
"functions": [
|
||||||
@ -819,6 +820,21 @@
|
|||||||
"OWNER": "[parameters('owner')]"
|
"OWNER": "[parameters('owner')]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"type": "Microsoft.Authorization/roleAssignments",
|
||||||
|
"apiVersion": "2017-09-01",
|
||||||
|
"name": "[guid(concat(resourceGroup().id, '-auto_scale'))]",
|
||||||
|
"properties": {
|
||||||
|
"roleDefinitionId": "[concat('/subscriptions/', subscription().subscriptionId, '/providers/Microsoft.Authorization/roleDefinitions/', variables('OneFuzz Deployment'))]",
|
||||||
|
"principalId": "[reference(resourceId('Microsoft.Web/sites', parameters('name')), '2018-02-01', 'Full').identity.principalId]"
|
||||||
|
},
|
||||||
|
"DependsOn": [
|
||||||
|
"[resourceId('Microsoft.Web/sites', parameters('name'))]"
|
||||||
|
],
|
||||||
|
"tags": {
|
||||||
|
"OWNER": "[parameters('owner')]"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "Microsoft.Authorization/roleAssignments",
|
"type": "Microsoft.Authorization/roleAssignments",
|
||||||
"apiVersion": "2018-07-01",
|
"apiVersion": "2018-07-01",
|
||||||
|
Reference in New Issue
Block a user