mirror of
https://github.com/microsoft/onefuzz.git
synced 2025-06-14 11:08:06 +00:00
Create autoscale resources for scaleset (#1661)
* Initial progress to adding a auto scale resource
* auto scale API is ready
* When creating a scaleset, add an autoscale resource to it as well
* Auto scale is correctly linked with scaleset
* 🧹
* Lint
* Cleaned up
This commit is contained in:
145
src/api-service/__app__/onefuzzlib/azure/auto_scale.py
Normal file
145
src/api-service/__app__/onefuzzlib/azure/auto_scale.py
Normal file
@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from uuid import UUID
|
||||
|
||||
from azure.core.exceptions import ResourceNotFoundError
|
||||
from azure.mgmt.monitor.models import (
|
||||
AutoscaleProfile,
|
||||
AutoscaleSettingResource,
|
||||
ComparisonOperationType,
|
||||
MetricStatisticType,
|
||||
MetricTrigger,
|
||||
ScaleAction,
|
||||
ScaleCapacity,
|
||||
ScaleDirection,
|
||||
ScaleRule,
|
||||
ScaleType,
|
||||
TimeAggregationType,
|
||||
)
|
||||
from msrestazure.azure_exceptions import CloudError
|
||||
from onefuzztypes.enums import ErrorCode
|
||||
from onefuzztypes.models import Error
|
||||
from onefuzztypes.primitives import Region
|
||||
|
||||
from .creds import (
|
||||
get_base_region,
|
||||
get_base_resource_group,
|
||||
get_subscription,
|
||||
retry_on_auth_failure,
|
||||
)
|
||||
from .monitor import get_monitor_client
|
||||
|
||||
|
||||
@retry_on_auth_failure()
|
||||
def add_auto_scale_to_vmss(
|
||||
vmss: UUID, auto_scale_profile: AutoscaleProfile
|
||||
) -> Optional[Error]:
|
||||
logging.info("Checking scaleset %s for existing auto scale resources" % vmss)
|
||||
client = get_monitor_client()
|
||||
resource_group = get_base_resource_group()
|
||||
|
||||
auto_scale_resource_id = None
|
||||
|
||||
try:
|
||||
auto_scale_collections = client.autoscale_settings.list_by_resource_group(
|
||||
resource_group
|
||||
)
|
||||
for auto_scale in auto_scale_collections:
|
||||
if str(auto_scale.target_resource_uri).endswith(str(vmss)):
|
||||
auto_scale_resource_id = auto_scale.id
|
||||
break
|
||||
except (ResourceNotFoundError, CloudError):
|
||||
return Error(
|
||||
code=ErrorCode.INVALID_CONFIGURATION,
|
||||
errors=[
|
||||
"Failed to check if scaleset %s already has an autoscale resource"
|
||||
% vmss
|
||||
],
|
||||
)
|
||||
|
||||
if auto_scale_resource_id is not None:
|
||||
logging.warning("Scaleset %s already has auto scale resource" % vmss)
|
||||
return None
|
||||
|
||||
resource_creation = create_auto_scale_resource_for(
|
||||
vmss, get_base_region(), auto_scale_profile
|
||||
)
|
||||
if isinstance(resource_creation, Error):
|
||||
return resource_creation
|
||||
return None
|
||||
|
||||
|
||||
def create_auto_scale_resource_for(
|
||||
resource_id: UUID, location: Region, profile: AutoscaleProfile
|
||||
) -> Union[AutoscaleSettingResource, Error]:
|
||||
logging.info("Creating auto scale resource for: %s" % resource_id)
|
||||
client = get_monitor_client()
|
||||
resource_group = get_base_resource_group()
|
||||
subscription = get_subscription()
|
||||
|
||||
scaleset_uri = (
|
||||
"/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/virtualMachineScaleSets/%s" # noqa: E501
|
||||
% (subscription, resource_group, resource_id)
|
||||
)
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"location": location,
|
||||
"profiles": [profile],
|
||||
"target_resource_uri": scaleset_uri,
|
||||
}
|
||||
|
||||
try:
|
||||
auto_scale_resource = client.autoscale_settings.create_or_update(
|
||||
resource_group, str(uuid.uuid4()), params
|
||||
)
|
||||
logging.info(
|
||||
"Successfully created auto scale resource %s for %s"
|
||||
% (auto_scale_resource.id, resource_id)
|
||||
)
|
||||
return auto_scale_resource
|
||||
except (ResourceNotFoundError, CloudError):
|
||||
return Error(
|
||||
code=ErrorCode.UNABLE_TO_CREATE,
|
||||
errors=[
|
||||
"unable to create auto scale resource for resource: %s with profile: %s"
|
||||
% (resource_id, profile)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def create_auto_scale_profile(min: int, max: int, queue_uri: str) -> AutoscaleProfile:
|
||||
return AutoscaleProfile(
|
||||
name=str(uuid.uuid4()),
|
||||
capacity=ScaleCapacity(minimum=min, maximum=max, default=max),
|
||||
rules=[
|
||||
ScaleRule(
|
||||
metric_trigger=MetricTrigger(
|
||||
metric_name="ApproximateMessageCount",
|
||||
metric_resource_uri=queue_uri,
|
||||
# Check every minute
|
||||
time_grain=timedelta(minutes=1),
|
||||
# The average amount of messages there are in the pool queue
|
||||
time_aggregation=TimeAggregationType.AVERAGE,
|
||||
statistic=MetricStatisticType.COUNT,
|
||||
# Over the past 10 minutes
|
||||
time_window=timedelta(minutes=10),
|
||||
# When there's more than 1 message in the pool queue
|
||||
operator=ComparisonOperationType.GREATER_THAN,
|
||||
threshold=1,
|
||||
),
|
||||
scale_action=ScaleAction(
|
||||
direction=ScaleDirection.INCREASE,
|
||||
type=ScaleType.CHANGE_COUNT,
|
||||
value=1,
|
||||
cooldown=timedelta(minutes=5),
|
||||
),
|
||||
)
|
||||
],
|
||||
)
|
29
src/api-service/__app__/onefuzzlib/azure/log_analytics.py
Normal file
29
src/api-service/__app__/onefuzzlib/azure/log_analytics.py
Normal file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
from azure.mgmt.loganalytics import LogAnalyticsManagementClient
|
||||
from memoization import cached
|
||||
|
||||
from .creds import get_base_resource_group, get_identity, get_subscription
|
||||
|
||||
|
||||
@cached
|
||||
def get_monitor_client() -> LogAnalyticsManagementClient:
|
||||
return LogAnalyticsManagementClient(get_identity(), get_subscription())
|
||||
|
||||
|
||||
@cached(ttl=60)
|
||||
def get_monitor_settings() -> Dict[str, str]:
|
||||
resource_group = get_base_resource_group()
|
||||
workspace_name = os.environ["ONEFUZZ_MONITOR"]
|
||||
client = get_monitor_client()
|
||||
customer_id = client.workspaces.get(resource_group, workspace_name).customer_id
|
||||
shared_key = client.shared_keys.get_shared_keys(
|
||||
resource_group, workspace_name
|
||||
).primary_shared_key
|
||||
return {"id": customer_id, "key": shared_key}
|
@ -1,29 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
from azure.mgmt.loganalytics import LogAnalyticsManagementClient
|
||||
from azure.mgmt.monitor import MonitorManagementClient
|
||||
from memoization import cached
|
||||
|
||||
from .creds import get_base_resource_group, get_identity, get_subscription
|
||||
from .creds import get_identity, get_subscription
|
||||
|
||||
|
||||
@cached
|
||||
def get_monitor_client() -> LogAnalyticsManagementClient:
|
||||
return LogAnalyticsManagementClient(get_identity(), get_subscription())
|
||||
|
||||
|
||||
@cached(ttl=60)
|
||||
def get_monitor_settings() -> Dict[str, str]:
|
||||
resource_group = get_base_resource_group()
|
||||
workspace_name = os.environ["ONEFUZZ_MONITOR"]
|
||||
client = get_monitor_client()
|
||||
customer_id = client.workspaces.get(resource_group, workspace_name).customer_id
|
||||
shared_key = client.shared_keys.get_shared_keys(
|
||||
resource_group, workspace_name
|
||||
).primary_shared_key
|
||||
return {"id": customer_id, "key": shared_key}
|
||||
def get_monitor_client() -> MonitorManagementClient:
|
||||
return MonitorManagementClient(get_identity(), get_subscription())
|
||||
|
@ -195,3 +195,9 @@ def queue_object(
|
||||
return True
|
||||
except ResourceNotFoundError:
|
||||
return False
|
||||
|
||||
|
||||
def get_resource_id(queue_name: QueueNameType, storage_type: StorageType) -> str:
|
||||
account_id = get_primary_account(storage_type)
|
||||
resource_uri = "%s/services/queue/queues/%s" % (account_id, queue_name)
|
||||
return resource_uri
|
||||
|
@ -25,7 +25,7 @@ from .azure.containers import (
|
||||
save_blob,
|
||||
)
|
||||
from .azure.creds import get_instance_id, get_instance_url
|
||||
from .azure.monitor import get_monitor_settings
|
||||
from .azure.log_analytics import get_monitor_settings
|
||||
from .azure.queue import get_queue_sas
|
||||
from .azure.storage import StorageType
|
||||
from .config import InstanceConfig
|
||||
|
@ -23,8 +23,11 @@ from onefuzztypes.primitives import PoolName, Region
|
||||
|
||||
from ..__version__ import __version__
|
||||
from ..azure.auth import build_auth
|
||||
from ..azure.auto_scale import add_auto_scale_to_vmss, create_auto_scale_profile
|
||||
from ..azure.image import get_os
|
||||
from ..azure.network import Network
|
||||
from ..azure.queue import get_resource_id
|
||||
from ..azure.storage import StorageType
|
||||
from ..azure.vmss import (
|
||||
UnableToUpdate,
|
||||
create_vmss,
|
||||
@ -242,6 +245,7 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
||||
self.set_failed(result)
|
||||
return
|
||||
else:
|
||||
# TODO: Link up auto scale resource with diagnostics
|
||||
logging.info(
|
||||
SCALESET_LOG_PREFIX + "creating scaleset scaleset_id:%s",
|
||||
self.scaleset_id,
|
||||
@ -257,6 +261,11 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
||||
SCALESET_LOG_PREFIX + "scaleset running scaleset_id:%s",
|
||||
self.scaleset_id,
|
||||
)
|
||||
auto_scaling = self.try_to_enable_auto_scaling()
|
||||
if isinstance(auto_scaling, Error):
|
||||
self.set_failed(auto_scaling)
|
||||
return
|
||||
|
||||
identity_result = self.try_set_identity(vmss)
|
||||
if identity_result:
|
||||
self.set_failed(identity_result)
|
||||
@ -823,3 +832,32 @@ class Scaleset(BASE_SCALESET, ORMMixin):
|
||||
state=self.state,
|
||||
)
|
||||
)
|
||||
|
||||
def try_to_enable_auto_scaling(self) -> Optional[Error]:
|
||||
from .pools import Pool
|
||||
|
||||
logging.info("Trying to add auto scaling for scaleset %s" % self.scaleset_id)
|
||||
|
||||
pool = Pool.get_by_name(self.pool_name)
|
||||
if isinstance(pool, Error):
|
||||
logging.error(
|
||||
"Failed to get pool by name: %s error: %s" % (self.pool_name, pool)
|
||||
)
|
||||
return pool
|
||||
|
||||
pool_queue_id = pool.get_pool_queue()
|
||||
pool_queue_uri = get_resource_id(pool_queue_id, StorageType.corpus)
|
||||
capacity = get_vmss_size(self.scaleset_id)
|
||||
if capacity is None:
|
||||
capacity_failed = Error(
|
||||
code=ErrorCode.UNABLE_TO_FIND,
|
||||
errors=["Failed to get capacity for scaleset %s" % self.scaleset_id],
|
||||
)
|
||||
logging.error(capacity_failed)
|
||||
return capacity_failed
|
||||
|
||||
auto_scale_profile = create_auto_scale_profile(
|
||||
capacity, capacity, pool_queue_uri
|
||||
)
|
||||
logging.info("Added auto scale resource to scaleset: %s" % self.scaleset_id)
|
||||
return add_auto_scale_to_vmss(self.scaleset_id, auto_scale_profile)
|
||||
|
@ -9,6 +9,7 @@ azure-keyvault-secrets~=4.3.0
|
||||
azure-mgmt-compute==24.0.1
|
||||
azure-mgmt-core==1.3.0
|
||||
azure-mgmt-loganalytics~=11.0.0
|
||||
azure-mgmt-monitor==3.0.0
|
||||
azure-mgmt-network==19.0.0
|
||||
azure-mgmt-storage~=18.0.0
|
||||
azure-mgmt-resource~=18.1.0
|
||||
|
@ -67,6 +67,7 @@
|
||||
"Storage Account Contributor": "17d1049b-9a84-46fb-8f53-869881c3d3ab",
|
||||
"Virtual Machine Contributor": "9980e02c-c2be-4d73-94e8-173b1dc7cf3c",
|
||||
"Storage Blob Data Reader": "2a2b9908-6ea1-4ae2-8e65-a410df84e7d1",
|
||||
"OneFuzz Deployment": "d4f7c2d9-6c1e-4caa-a39b-cba6d76bc647",
|
||||
"keyVaultName": "[concat('of-kv-', uniquestring(resourceGroup().id))]"
|
||||
},
|
||||
"functions": [
|
||||
@ -819,6 +820,21 @@
|
||||
"OWNER": "[parameters('owner')]"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Microsoft.Authorization/roleAssignments",
|
||||
"apiVersion": "2017-09-01",
|
||||
"name": "[guid(concat(resourceGroup().id, '-auto_scale'))]",
|
||||
"properties": {
|
||||
"roleDefinitionId": "[concat('/subscriptions/', subscription().subscriptionId, '/providers/Microsoft.Authorization/roleDefinitions/', variables('OneFuzz Deployment'))]",
|
||||
"principalId": "[reference(resourceId('Microsoft.Web/sites', parameters('name')), '2018-02-01', 'Full').identity.principalId]"
|
||||
},
|
||||
"DependsOn": [
|
||||
"[resourceId('Microsoft.Web/sites', parameters('name'))]"
|
||||
],
|
||||
"tags": {
|
||||
"OWNER": "[parameters('owner')]"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Microsoft.Authorization/roleAssignments",
|
||||
"apiVersion": "2018-07-01",
|
||||
|
Reference in New Issue
Block a user