associate subnets with NSG (#1393)

* associate subnets with NSG

change NSG rule protocol to ANY

* subnet wait

* Improve NSG update logic

validate that subnet nsg is set before getting it's id (#1409)

Co-authored-by: stas <statis@microsoft.com>
This commit is contained in:
Stas
2021-10-25 14:40:20 -07:00
parent cbe6ef8e40
commit 93d2d8d1b7
7 changed files with 236 additions and 29 deletions

View File

@ -3,16 +3,17 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import logging
import azure.functions as func
from onefuzztypes.enums import ErrorCode
from onefuzztypes.models import Error
from onefuzztypes.requests import InstanceConfigUpdate
from ..onefuzzlib.azure.nsg import set_allowed
from ..onefuzzlib.azure.nsg import is_one_fuzz_nsg, list_nsgs, set_allowed
from ..onefuzzlib.config import InstanceConfig
from ..onefuzzlib.endpoint_authorization import call_if_user, can_modify_config
from ..onefuzzlib.request import not_ok, ok, parse_request
from ..onefuzzlib.workers.scalesets import Scaleset
def get(req: func.HttpRequest) -> func.HttpResponse:
@ -46,15 +47,21 @@ def post(req: func.HttpRequest) -> func.HttpResponse:
# Update All NSGs
if update_nsg:
scalesets = Scaleset.search()
regions = set(x.region for x in scalesets)
for region in regions:
result = set_allowed(region, request.config.proxy_nsg_config)
nsgs = list_nsgs()
for nsg in nsgs:
logging.info(
"Checking if nsg: %s (%s) owned by OneFuzz" % (nsg.location, nsg.name)
)
if is_one_fuzz_nsg(nsg.location, nsg.name):
result = set_allowed(nsg.location, request.config.proxy_nsg_config)
if isinstance(result, Error):
return not_ok(
Error(
code=ErrorCode.UNABLE_TO_CREATE,
errors=["Unable to update nsg %s due to %s" % (region, result)],
errors=[
"Unable to update nsg %s due to %s"
% (nsg.location, result)
],
),
context="instance_config_update",
)

View File

@ -5,10 +5,11 @@
import logging
import os
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional, Union, cast
from uuid import UUID
from azure.core.exceptions import ResourceNotFoundError
from azure.mgmt.network.models import Subnet
from msrestazure.azure_exceptions import CloudError
from msrestazure.tools import parse_resource_id
from onefuzztypes.enums import ErrorCode
@ -18,6 +19,7 @@ from onefuzztypes.primitives import Region
from .creds import get_base_resource_group
from .network import Network
from .network_mgmt_client import get_network_client
from .nsg import NSG
from .vmss import get_instance_id
@ -95,7 +97,7 @@ def delete_nic(resource_group: str, name: str) -> Optional[Any]:
def create_public_nic(
resource_group: str, name: str, region: Region
resource_group: str, name: str, region: Region, nsg: Optional[NSG]
) -> Optional[Error]:
logging.info("creating nic for %s:%s in %s", resource_group, name, region)
@ -105,6 +107,14 @@ def create_public_nic(
network.create()
return None
if nsg:
subnet = cast(Subnet, network.get_subnet())
if not subnet.network_security_group:
result = nsg.associate_subnet(network.get_vnet(), subnet)
if isinstance(result, Error):
return result
return None
ip = get_ip(resource_group, name)
if not ip:
create_ip(resource_group, name, region)

View File

@ -7,6 +7,7 @@ import logging
import uuid
from typing import Optional, Union
from azure.mgmt.network.models import Subnet, VirtualNetwork
from msrestazure.azure_exceptions import CloudError
from onefuzztypes.enums import ErrorCode
from onefuzztypes.models import Error, NetworkConfig
@ -14,7 +15,7 @@ from onefuzztypes.primitives import Region
from ..config import InstanceConfig
from .creds import get_base_resource_group
from .subnet import create_virtual_network, get_subnet_id
from .subnet import create_virtual_network, get_subnet, get_subnet_id, get_vnet
# This was generated randomly and should be preserved moving forwards
NETWORK_GUID_NAMESPACE = uuid.UUID("372977ad-b533-416a-b1b4-f770898e0b11")
@ -51,6 +52,12 @@ class Network:
def get_id(self) -> Optional[str]:
return get_subnet_id(self.group, self.name, self.name)
def get_subnet(self) -> Optional[Subnet]:
return get_subnet(self.group, self.name, self.name)
def get_vnet(self) -> Optional[VirtualNetwork]:
return get_vnet(self.group, self.name)
def create(self) -> Union[None, Error]:
if not self.exists():
result = create_virtual_network(

View File

@ -13,7 +13,8 @@ from azure.mgmt.network.models import (
NetworkSecurityGroup,
SecurityRule,
SecurityRuleAccess,
SecurityRuleProtocol,
Subnet,
VirtualNetwork,
)
from msrestazure.azure_exceptions import CloudError
from onefuzztypes.enums import ErrorCode
@ -105,6 +106,10 @@ def ok_to_delete(active_regions: Set[Region], nsg_region: str, nsg_name: str) ->
return nsg_region not in active_regions and nsg_region == nsg_name
def is_one_fuzz_nsg(nsg_region: str, nsg_name: str) -> bool:
return nsg_region == nsg_name
def delete_nsg(name: str) -> bool:
# NSG can be only deleted if no other resource is associated with it
resource_group = get_base_resource_group()
@ -163,7 +168,7 @@ def set_allowed(name: str, sources: NetworkSecurityGroupConfig) -> Union[None, E
security_rules.append(
SecurityRule(
name="Allow" + str(priority),
protocol=SecurityRuleProtocol.TCP,
protocol="*",
source_port_range="*",
destination_port_range="*",
source_address_prefix=src,
@ -284,7 +289,7 @@ def dissociate_nic(name: str, nic: NetworkInterface) -> Union[None, Error]:
except (ResourceNotFoundError, CloudError) as err:
if is_concurrent_request_error(str(err)):
logging.debug(
"associate NSG with NIC had conflicts with ",
"dissociate nsg with nic had conflicts with ",
"concurrent request, ignoring %s",
err,
)
@ -292,7 +297,7 @@ def dissociate_nic(name: str, nic: NetworkInterface) -> Union[None, Error]:
return Error(
code=ErrorCode.UNABLE_TO_UPDATE,
errors=[
"Unable to associate nsg %s with nic %s due to %s"
"Unable to dissociate nsg %s with nic %s due to %s"
% (
name,
nic.name,
@ -304,6 +309,124 @@ def dissociate_nic(name: str, nic: NetworkInterface) -> Union[None, Error]:
return None
def associate_subnet(
name: str, vnet: VirtualNetwork, subnet: Subnet
) -> Union[None, Error]:
resource_group = get_base_resource_group()
nsg = get_nsg(name)
if not nsg:
return Error(
code=ErrorCode.UNABLE_TO_FIND,
errors=["cannot associate subnet. nsg %s not found" % name],
)
if nsg.location != vnet.location:
return Error(
code=ErrorCode.UNABLE_TO_UPDATE,
errors=[
"subnet and nsg have to be in the same region.",
"nsg %s %s, subnet: %s %s"
% (nsg.name, nsg.location, subnet.name, subnet.location),
],
)
# this is noop, since correct NSG is already assigned
if subnet.network_security_group and subnet.network_security_group.id == nsg.id:
return None
logging.info(
"associating subnet %s with nsg: %s %s", subnet.name, resource_group, name
)
subnet.network_security_group = nsg
network_client = get_network_client()
try:
network_client.subnets.begin_create_or_update(
resource_group, vnet.name, subnet.name, subnet
)
except (ResourceNotFoundError, CloudError) as err:
if is_concurrent_request_error(str(err)):
logging.debug(
"associate NSG with subnet had conflicts",
"with concurrent request, ignoring %s",
err,
)
return None
return Error(
code=ErrorCode.UNABLE_TO_UPDATE,
errors=[
"Unable to associate nsg %s with subnet %s due to %s"
% (
name,
subnet.name,
err,
)
],
)
return None
def dissociate_subnet(
name: str, vnet: VirtualNetwork, subnet: Subnet
) -> Union[None, Error]:
if subnet.network_security_group is None:
return None
resource_group = get_base_resource_group()
nsg = get_nsg(name)
if not nsg:
return Error(
code=ErrorCode.UNABLE_TO_FIND,
errors=["cannot update nsg rules. nsg %s not found" % name],
)
if nsg.id != subnet.network_security_group.id:
return Error(
code=ErrorCode.UNABLE_TO_UPDATE,
errors=[
"subnet is not associated with this nsg.",
"nsg %s, subnet: %s, subnet.nsg: %s"
% (
nsg.id,
subnet.name,
subnet.network_security_group.id,
),
],
)
logging.info(
"dissociating subnet %s with nsg: %s %s", subnet.name, resource_group, name
)
subnet.network_security_group = None
network_client = get_network_client()
try:
network_client.subnets.begin_create_or_update(
resource_group, vnet.name, subnet.name, subnet
)
except (ResourceNotFoundError, CloudError) as err:
if is_concurrent_request_error(str(err)):
logging.debug(
"dissociate nsg with subnet had conflicts with ",
"concurrent request, ignoring %s",
err,
)
return None
return Error(
code=ErrorCode.UNABLE_TO_UPDATE,
errors=[
"Unable to dissociate nsg %s with subnet %s due to %s"
% (
name,
subnet.name,
err,
)
],
)
return None
class NSG(BaseModel):
name: str
region: Region
@ -345,3 +468,13 @@ class NSG(BaseModel):
def dissociate_nic(self, nic: NetworkInterface) -> Union[None, Error]:
return dissociate_nic(self.name, nic)
def associate_subnet(
self, vnet: VirtualNetwork, subnet: Subnet
) -> Union[None, Error]:
return associate_subnet(self.name, vnet, subnet)
def dissociate_subnet(
self, vnet: VirtualNetwork, subnet: Subnet
) -> Union[None, Error]:
return dissociate_subnet(self.name, vnet, subnet)

View File

@ -8,6 +8,7 @@ import os
from typing import Any, Optional, Union, cast
from azure.core.exceptions import ResourceNotFoundError
from azure.mgmt.network.models import Subnet, VirtualNetwork
from msrestazure.azure_exceptions import CloudError
from onefuzztypes.enums import ErrorCode
from onefuzztypes.models import Error, NetworkConfig
@ -16,21 +17,41 @@ from onefuzztypes.primitives import Region
from .network_mgmt_client import get_network_client
def get_subnet_id(resource_group: str, name: str, subnet_name: str) -> Optional[str]:
def get_vnet(resource_group: str, name: str) -> Optional[VirtualNetwork]:
network_client = get_network_client()
try:
subnet = network_client.subnets.get(resource_group, name, subnet_name)
return cast(str, subnet.id)
vnet = network_client.virtual_networks.get(resource_group, name)
return cast(VirtualNetwork, vnet)
except (CloudError, ResourceNotFoundError):
logging.info(
"subnet missing: resource group:%s name:%s subnet_name:%s",
"vnet missing: resource group:%s name:%s",
resource_group,
name,
subnet_name,
)
return None
def get_subnet(
resource_group: str, vnet_name: str, subnet_name: str
) -> Optional[Subnet]:
# Has to get using vnet. That way NSG field is properly set up in subnet
vnet = get_vnet(resource_group, vnet_name)
if vnet:
for subnet in vnet.subnets:
if subnet.name == subnet_name:
return subnet
return None
def get_subnet_id(resource_group: str, name: str, subnet_name: str) -> Optional[str]:
subnet = get_subnet(resource_group, name, subnet_name)
if subnet:
return cast(str, subnet.id)
else:
return None
def delete_subnet(resource_group: str, name: str) -> Union[None, CloudError, Any]:
network_client = get_network_client()
try:

View File

@ -57,11 +57,15 @@ def create_vm(
nic = get_public_nic(resource_group, name)
if nic is None:
result = create_public_nic(resource_group, name, location)
result = create_public_nic(resource_group, name, location, nsg)
if isinstance(result, Error):
return result
logging.info("waiting on nic creation")
return None
# when public nic is created, VNET must exist at that point
# this is logic of get_public_nic function
if nsg:
result = nsg.associate_nic(nic)
if isinstance(result, Error):

View File

@ -7,8 +7,16 @@ import logging
import azure.functions as func
from onefuzztypes.enums import VmState
from onefuzztypes.models import Error
from ..onefuzzlib.azure.nsg import delete_nsg, list_nsgs, ok_to_delete
from ..onefuzzlib.azure.network import Network
from ..onefuzzlib.azure.nsg import (
associate_subnet,
delete_nsg,
get_nsg,
list_nsgs,
ok_to_delete,
)
from ..onefuzzlib.orm import process_state_updates
from ..onefuzzlib.proxy import PROXY_LOG_PREFIX, Proxy
from ..onefuzzlib.workers.scalesets import Scaleset
@ -52,9 +60,26 @@ def main(mytimer: func.TimerRequest) -> None: # noqa: F841
if all(x.outdated for x in proxies if x.region == region):
Proxy.get_or_create(region)
logging.info("Creating new proxy in region %s" % region)
# this is required in order to support upgrade from non-nsg to
# nsg enabled OneFuzz this will overwrite existing NSG
# assignment though. This behavior is acceptable at this point
# since we do not support bring your own NSG
if get_nsg(region):
network = Network(region)
subnet = network.get_subnet()
if subnet:
result = associate_subnet(region, network.get_vnet(), subnet)
if isinstance(result, Error):
logging.error(
"Failed to associate NSG and subnet due to %s in region %s"
% (result, region)
)
# if there are NSGs with name same as the region that they are allocated
# and have no NIC associated with it then delete the NSG
for nsg in list_nsgs():
if ok_to_delete(regions, nsg.location, nsg.name):
if nsg.network_interfaces is None:
if nsg.network_interfaces is None and nsg.subnets is None:
delete_nsg(nsg.name)