Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AKS: Get VM SKU details from backend for validation for Azure Container Storage #8178

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions src/aks-preview/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ If there is no rush to release a new version, please just add a description of t

To release a new version, please select a new version number (usually plus 1 to last patch version, X.Y.Z -> Major.Minor.Patch, more details in `\doc <https://semver.org/>`_), and then add a new section named as the new version number in this file, the content should include the new modifications and everything from the *Pending* section. Finally, update the `VERSION` variable in `setup.py` with this new version number.

9.0.0b8
+++++++
* Update VM SKU validations to get values from backend API for Azure Container Storage.

9.0.0b7
+++++++
* Fix bug related to updating the monitoring addon DCR when the non monitoring addon enabled through `az aks enable-addons`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
delete_role_assignments,
)
from azure.cli.core.azclierror import UnknownError
from cachetools import cached
from cachetools.keys import hashkey
from knack.log import get_logger

logger = get_logger(__name__)
Expand Down Expand Up @@ -410,38 +412,6 @@ def get_desired_resource_value_args(
)


# get_cores_from_sku returns the number of core in the vm_size passed.
# Returns -1 if there is a problem with parsing the vm_size.
def get_cores_from_sku(vm_size):
cpu_value = -1
pattern = r'([a-z])+(\d+)[a-z]*(?=_v(\d+)[^_]*$|$)'
match = re.search(pattern, vm_size.lower())
if match:
series_prefix = match.group(1)
size_val = int(match.group(2))
version_val = match.group(3)
version = -1
if version_val is not None:
version = int(version_val)

cpu_value = size_val
# https://learn.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series
# https://learn.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series-memory
if version == 2 and (series_prefix in ('d', 'ds')):
if size_val in (2, 11):
cpu_value = 2
elif size_val in (3, 12):
cpu_value = 4
elif size_val in (4, 13):
cpu_value = 8
elif size_val in (5, 14):
cpu_value = 16
elif size_val == 15:
cpu_value = 20

return cpu_value


def check_if_new_storagepool_creation_required(
storage_pool_type,
ephemeral_disk_volume_type,
Expand All @@ -467,6 +437,34 @@ def check_if_new_storagepool_creation_required(
return should_create_storagepool


def generate_vm_sku_cache_for_region(cli_ctx, location=None):
result = _get_vm_sku_details(cli_ctx, location)
for vm_data in result:
sku_name = vm_data.name.lower()
capabilities = vm_data.capabilities
cpu_value = -1
nvme_enabled = False
for entry in capabilities:
if entry.name == 'vCPUs' and cpu_value == -1:
cpu_value = int(entry.value)

if entry.name == 'vCPUsAvailable':
cpu_value = int(entry.value)

if entry.name == 'NvmeDiskSizeInMiB':
nvme_enabled = True

vm_sku_details(sku_name, cpu_value, nvme_enabled)


@cached(
cache={},
key=lambda sku_name, cpu_value=None, nvme_enabled=None: hashkey(sku_name)
)
def vm_sku_details(sku_name, cpu_value=None, nvme_enabled=None):
return cpu_value, nvme_enabled


def _get_ephemeral_nvme_cpu_value_based_on_vm_size_perf_tier(nodepool_skus, perf_tier):
cpu_value = -1
multiplication_factor = 0.25
Expand All @@ -475,7 +473,7 @@ def _get_ephemeral_nvme_cpu_value_based_on_vm_size_perf_tier(nodepool_skus, perf
elif perf_tier.lower() == CONST_EPHEMERAL_NVME_PERF_TIER_PREMIUM.lower():
multiplication_factor = 0.5
for vm_size in nodepool_skus:
number_of_cores = get_cores_from_sku(vm_size)
number_of_cores, _ = vm_sku_details(vm_size.lower())
if number_of_cores != -1:
if cpu_value == -1:
cpu_value = number_of_cores * multiplication_factor
Expand Down Expand Up @@ -554,3 +552,18 @@ def _generate_k8s_extension_resource_args(
]

return resource_args


def _get_vm_sku_details(cli_ctx, location=None):
def _is_vm_in_required_location(desired_location, location_list):
for val in location_list:
if desired_location.lower() == val.lower():
return True
return False

from azext_aks_preview._client_factory import get_compute_client
result = get_compute_client(cli_ctx).resource_skus.list()
result = [x for x in result if x.resource_type.lower() == 'virtualmachines']
if location:
result = [r for r in result if _is_vm_in_required_location(location, r.locations)]
return result
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
CONST_STORAGE_POOL_TYPE_EPHEMERAL_DISK,
)
from azext_aks_preview.azurecontainerstorage._helpers import (
get_cores_from_sku
vm_sku_details
)
from azure.cli.core.azclierror import (
ArgumentUsageError,
Expand Down Expand Up @@ -452,11 +452,10 @@ def _validate_nodepools( # pylint: disable=too-many-branches,too-many-locals
)

if agentpool_details is not None:
for agentpool in agentpool_details:
for nodepool_name, agentpool in agentpool_details.items():
node_labels = agentpool.get("node_labels")
if node_labels is not None and \
node_labels.get(CONST_ACSTOR_IO_ENGINE_LABEL_KEY) is not None:
nodepool_name = agentpool.get("name")
nodepool_arr.append(nodepool_name)

if len(nodepool_arr) == 0:
Expand All @@ -473,64 +472,78 @@ def _validate_nodepools( # pylint: disable=too-many-branches,too-many-locals
'from the node pool and use node pools which has nodes with 4 or more cores and try again.'
)
else:
agentpool_names = []
if agentpool_details is not None:
for details in agentpool_details:
agentpool_names.append(details.get("name"))
if not nodepool_list:
agentpool_names_str = ', '.join(agentpool_names)
agentpool_names_str = ', '.join(agentpool_details.keys())
raise RequiredArgumentMissingError(
'Multiple node pools present. Please define the node pools on which you want '
'to enable Azure Container Storage using --azure-container-storage-nodepools.'
f'\nNode pools available in the cluster are: {agentpool_names_str}.'
'\nAborting Azure Container Storage operation.'
)
_validate_nodepool_names(nodepool_list, agentpool_names)
pattern = r'^[a-z][a-z0-9]*(?:,[a-z][a-z0-9]*)*$'
if re.fullmatch(pattern, nodepool_list) is None:
raise InvalidArgumentValueError(
'Invalid --azure-container-storage-nodepools value. '
'Accepted value is a comma separated string of valid node pool '
'names without any spaces.\nA valid node pool name may only contain lowercase '
'alphanumeric characters and must begin with a lowercase letter.'
)
nodepool_arr = nodepool_list.split(',')

nvme_nodepool_found = False
available_node_count = 0
multi_zoned_cluster = False

for nodepool in nodepool_arr:
for agentpool in agentpool_details:
pool_name = agentpool.get("name")
if nodepool == pool_name:
os_type = agentpool.get("os_type")
if os_type is not None and os_type.lower() != CONST_DEFAULT_NODE_OS_TYPE.lower():
agentpool = agentpool_details.get(nodepool)
if agentpool is not None:
os_type = agentpool.get("os_type")
if os_type is not None and os_type.lower() != CONST_DEFAULT_NODE_OS_TYPE.lower():
raise InvalidArgumentValueError(
f'Azure Container Storage can be enabled only on {CONST_DEFAULT_NODE_OS_TYPE} nodepools. '
f'Node pool: {pool_name}, os type: {os_type} does not meet the criteria.'
)
mode = agentpool.get("mode")
node_taints = agentpool.get("node_taints")
if mode is not None and mode.lower() == "system" and node_taints is not None:
critical_taint = "CriticalAddonsOnly=true:NoSchedule"
if critical_taint.casefold() in (taint.casefold() for taint in node_taints):
raise InvalidArgumentValueError(
f'Azure Container Storage can be enabled only on {CONST_DEFAULT_NODE_OS_TYPE} nodepools. '
f'Node pool: {pool_name}, os type: {os_type} does not meet the criteria.'
f'Unable to install Azure Container Storage on system nodepool: {pool_name} '
f'since it has a taint {critical_taint}. Remove the taint from the node pool '
'and retry the Azure Container Storage operation.'
)
mode = agentpool.get("mode")
node_taints = agentpool.get("node_taints")
if mode is not None and mode.lower() == "system" and node_taints is not None:
critical_taint = "CriticalAddonsOnly=true:NoSchedule"
if critical_taint.casefold() in (taint.casefold() for taint in node_taints):
raise InvalidArgumentValueError(
f'Unable to install Azure Container Storage on system nodepool: {pool_name} '
f'since it has a taint {critical_taint}. Remove the taint from the node pool '
'and retry the Azure Container Storage operation.'
)
vm_size = agentpool.get("vm_size")
if vm_size is not None:
cpu_value = get_cores_from_sku(vm_size)
if cpu_value < 0:
raise UnknownError(
f'Unable to determine number of cores in node pool: {pool_name}, node size: {vm_size}'
)
if cpu_value < 4:
raise InvalidArgumentValueError(insufficient_core_error.format(pool_name, vm_size, cpu_value))

if vm_size.lower().startswith('standard_l'):
nvme_nodepool_found = True

node_count = agentpool.get("count")
if node_count is not None:
available_node_count = available_node_count + node_count

zoned_nodepool = agentpool.get("zoned")
if zoned_nodepool:
multi_zoned_cluster = True
vm_size = agentpool.get("vm_size")
if vm_size is not None:
cpu_value, nvme_enabled = vm_sku_details(vm_size.lower())
if cpu_value < 0:
raise UnknownError(
f'Unable to determine number of cores in node pool: {pool_name}, node size: {vm_size}'
)
if cpu_value < 4:
raise InvalidArgumentValueError(insufficient_core_error.format(pool_name, vm_size, cpu_value))

nvme_nodepool_found = nvme_nodepool_found or nvme_enabled

node_count = agentpool.get("count")
if node_count is not None:
available_node_count = available_node_count + node_count

zoned_nodepool = agentpool.get("zoned")
if zoned_nodepool:
multi_zoned_cluster = True
else:
agentpool_names_str = ",".join(agentpool_details.keys())
nodepool_str = "Node pool"
if len(agentpool_details.keys()):
nodepool_str = "Node pools"
raise InvalidArgumentValueError(
f'Node pool: {nodepool} not found. '
'Please provide a comma separated string of existing node pool names '
'in --azure-container-storage-nodepools.'
f'\n{nodepool_str} available in the cluster: {agentpool_names_str}.'
'\nAborting installation of Azure Container Storage.'
)

if available_node_count < 3:
raise UnknownError(
Expand All @@ -553,37 +566,3 @@ def _validate_nodepools( # pylint: disable=too-many-branches,too-many-locals
'as none of the node pools are zoned. Please add a zoned node pool and '
'try again.'
)


# _validate_nodepool_names validates that the nodepool_list is a comma separated
# string consisting of valid nodepool names i.e. a lower alphanumeric
# characters and the first character should be lowercase letter.
def _validate_nodepool_names(nodepool_names, agentpool_names):
pattern = r'^[a-z][a-z0-9]*(?:,[a-z][a-z0-9]*)*$'
if re.fullmatch(pattern, nodepool_names) is None:
raise InvalidArgumentValueError(
"Invalid --azure-container-storage-nodepools value. "
"Accepted value is a comma separated string of valid node pool "
"names without any spaces.\nA valid node pool name may only contain lowercase "
"alphanumeric characters and must begin with a lowercase letter."
)

nodepool_list = nodepool_names.split(',')
for nodepool in nodepool_list:
if nodepool not in agentpool_names:
if len(agentpool_names) > 1:
agentpool_names_str = ', '.join(agentpool_names)
raise InvalidArgumentValueError(
f'Node pool: {nodepool} not found. '
'Please provide a comma separated string of existing node pool names '
'in --azure-container-storage-nodepools.'
f'\nNode pools available in the cluster are: {agentpool_names_str}.'
'\nAborting installation of Azure Container Storage.'
)
raise InvalidArgumentValueError(
f'Node pool: {nodepool} not found. '
'Please provide a comma separated string of existing node pool names '
'in --azure-container-storage-nodepools.'
f'\nNode pool available in the cluster is: {agentpool_names[0]}.'
'\nAborting installation of Azure Container Storage.'
)
Loading
Loading