support multiple corpus accounts (#334)

Add support for sharding across multiple storage accounts for blob containers used for corpus management.

Things to note:

1. Additional storage accounts must be in the same resource group, support the "blob" endpoint, and have the tag `storage_type` with the value `corpus`.  A utility is provided (`src/utils/add-corpus-storage-accounts`), which adds storage accounts. 
2. If any secondary storage accounts exist, they are used by default for containers.
3. Storage account names are cached in memory the Azure Function instance forever.   Upon adding new storage accounts, the app needs to be restarted to pick up the new accounts.
This commit is contained in:
bmc-msft
2021-01-06 18:11:39 -05:00
committed by GitHub
parent f345bd239d
commit 3b26ffef65
29 changed files with 496 additions and 179 deletions

View File

@ -3,9 +3,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import logging
import os
from typing import Any, List, Optional, Tuple
from typing import Any, List
from uuid import UUID
from azure.cli.core import CLIError
@ -13,12 +12,11 @@ from azure.common.client_factory import get_client_from_cli_profile
from azure.graphrbac import GraphRbacManagementClient
from azure.graphrbac.models import CheckGroupMembershipParameters
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.storage import StorageManagementClient
from azure.mgmt.subscription import SubscriptionClient
from azure.storage.blob import BlockBlobService
from memoization import cached
from msrestazure.azure_active_directory import MSIAuthentication
from msrestazure.tools import parse_resource_id
from onefuzztypes.primitives import Container
from .monkeypatch import allow_more_workers, reduce_logging
@ -35,34 +33,14 @@ def mgmt_client_factory(client_class: Any) -> Any:
try:
return get_client_from_cli_profile(client_class)
except CLIError:
if issubclass(client_class, SubscriptionClient):
return client_class(get_msi())
else:
return client_class(get_msi(), get_subscription())
pass
except OSError:
pass
@cached
def get_storage_account_name_key(account_id: Optional[str] = None) -> Tuple[str, str]:
db_client = mgmt_client_factory(StorageManagementClient)
if account_id is None:
account_id = os.environ["ONEFUZZ_DATA_STORAGE"]
resource = parse_resource_id(account_id)
key = (
db_client.storage_accounts.list_keys(
resource["resource_group"], resource["name"]
)
.keys[0]
.value
)
return resource["name"], key
@cached
def get_blob_service(account_id: Optional[str] = None) -> BlockBlobService:
logging.debug("getting blob container (account_id: %s)", account_id)
name, key = get_storage_account_name_key(account_id)
service = BlockBlobService(account_name=name, account_key=key)
return service
if issubclass(client_class, SubscriptionClient):
return client_class(get_msi())
else:
return client_class(get_msi(), get_subscription())
@cached
@ -92,16 +70,6 @@ def get_insights_appid() -> str:
return os.environ["APPINSIGHTS_APPID"]
# @cached
def get_fuzz_storage() -> str:
return os.environ["ONEFUZZ_DATA_STORAGE"]
# @cached
def get_func_storage() -> str:
return os.environ["ONEFUZZ_FUNC_STORAGE"]
@cached
def get_instance_name() -> str:
return os.environ["ONEFUZZ_INSTANCE_NAME"]
@ -114,9 +82,10 @@ def get_instance_url() -> str:
@cached
def get_instance_id() -> UUID:
from .containers import StorageType, get_blob
from .containers import get_blob
from .storage import StorageType
blob = get_blob("base-config", "instance_id", StorageType.config)
blob = get_blob(Container("base-config"), "instance_id", StorageType.config)
if blob is None:
raise Exception("missing instance_id")
return UUID(blob.decode())