support multiple corpus accounts (#334)

Add support for sharding across multiple storage accounts for blob containers used for corpus management.

Things to note:

1. Additional storage accounts must be in the same resource group, support the "blob" endpoint, and have the tag `storage_type` with the value `corpus`.  A utility is provided (`src/utils/add-corpus-storage-accounts`), which adds storage accounts. 
2. If any secondary storage accounts exist, they are used by default for containers.
3. Storage account names are cached in memory the Azure Function instance forever.   Upon adding new storage accounts, the app needs to be restarted to pick up the new accounts.
This commit is contained in:
bmc-msft
2021-01-06 18:11:39 -05:00
committed by GitHub
parent f345bd239d
commit 3b26ffef65
29 changed files with 496 additions and 179 deletions

View File

@ -19,8 +19,7 @@ from azure.storage.queue import (
from memoization import cached
from pydantic import BaseModel
from .containers import StorageType, get_account_id_by_type
from .creds import get_storage_account_name_key
from .storage import StorageType, get_primary_account, get_storage_account_name_key
QueueNameType = Union[str, UUID]
@ -29,7 +28,7 @@ DEFAULT_TTL = -1
@cached(ttl=60)
def get_queue_client(storage_type: StorageType) -> QueueServiceClient:
account_id = get_account_id_by_type(storage_type)
account_id = get_primary_account(storage_type)
logging.debug("getting blob container (account_id: %s)", account_id)
name, key = get_storage_account_name_key(account_id)
account_url = "https://%s.queue.core.windows.net" % name
@ -50,7 +49,7 @@ def get_queue_sas(
update: bool = False,
process: bool = False,
) -> str:
account_id = get_account_id_by_type(storage_type)
account_id = get_primary_account(storage_type)
logging.debug("getting queue sas %s (account_id: %s)", queue, account_id)
name, key = get_storage_account_name_key(account_id)
expiry = datetime.datetime.utcnow() + datetime.timedelta(days=30)