[Question] Does AKS actually support cluster-autoscaler-priority-expander? #4761
Description
Describe scenario
I have been playing around with spot instances at node auto scaling. Based on some articles I've seen online, and previous GitHub issues, it seems that AKS should support a cluster-autoscaler-priority-expander configMap to control behaviour when priority expander is used. I've tried various different things (some of which I left commented out in the YAML below), but it doesn't seem to be taking effect.
Question
Is this actually supported and if so what is the problem?
Supporting Info
Even though this is actually the opposite of what I'll ultimately look to do, it is useful because the behaviour does not work. In this example, I want the "regular" pool to scale before the spot pool..
Some quick and dirty Terraform below to bootstrap
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "~> 4.16.0"
}
}
}
provider "azurerm" {
features {}
subscription_id = "<sub_id>"
}
resource "azurerm_resource_group" "aks_rg" {
name = "aks-spot-demo-rg"
location = "uksouth"
}
resource "azurerm_kubernetes_cluster" "aks" {
name = "aks-spot-demo"
location = azurerm_resource_group.aks_rg.location
resource_group_name = azurerm_resource_group.aks_rg.name
dns_prefix = "aks-spot-demo"
default_node_pool {
name = "system"
node_count = 1
vm_size = "Standard_B4ms"
type = "VirtualMachineScaleSets"
auto_scaling_enabled = true
min_count = 1
max_count = 3
}
identity {
type = "SystemAssigned"
}
auto_scaler_profile {
expander = "priority"
}
network_profile {
network_plugin = "azure"
network_plugin_mode = "overlay"
load_balancer_sku = "standard"
}
}
resource "azurerm_kubernetes_cluster_node_pool" "spot" {
name = "spot"
kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id
vm_size = "Standard_D2_v2"
priority = "Spot"
eviction_policy = "Delete"
spot_max_price = 0.05
auto_scaling_enabled = true
min_count = 0
max_count = 5
# node_taints = [
# "kubernetes.azure.com/scalesetpriority=spot:NoSchedule"
# ]
}
resource "azurerm_kubernetes_cluster_node_pool" "regular" {
name = "regular"
kubernetes_cluster_id = azurerm_kubernetes_cluster.aks.id
vm_size = "Standard_B2s"
auto_scaling_enabled = true
min_count = 0
max_count = 3
}
resource "local_file" "kubeconfig" {
content = azurerm_kubernetes_cluster.aks.kube_config_raw
filename = "${path.module}/kubeconfig.yaml"
}
output "kubectl_command" {
value = "alias k=\"kubectl --kubeconfig=kubeconfig.yaml\""
}
I applied the config config map:
---
apiVersion: v1
kind: ConfigMap
metadata:
name: cluster-autoscaler-priority-expander
namespace: kube-system
data:
priorities: |-
100:
- .*regular*
50:
- .*spot*
10:
- .*
Then I deployed my test application:
apiVersion: v1
kind: Namespace
metadata:
name: demo
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: demo-app
namespace: demo
labels:
app: demo-app
spec:
replicas: 3
selector:
matchLabels:
app: demo-app
template:
metadata:
labels:
app: demo-app
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.azure.com/scalesetpriority
operator: In
values:
- spot
- regular
# preferredDuringSchedulingIgnoredDuringExecution:
# - weight: 100
# preference:
# matchExpressions:
# - key: kubernetes.azure.com/scalesetpriority
# operator: In
# values:
# - spot
# - weight: 50
# preference:
# matchExpressions:
# - key: kubernetes.azure.com/scalesetpriority
# operator: In
# values:
# - regular
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: demo-app
topologyKey: "kubernetes.io/hostname"
tolerations:
- key: "kubernetes.azure.com/scalesetpriority"
operator: "Equal"
value: "spot"
effect: "NoSchedule"
terminationGracePeriodSeconds: 25
# securityContext:
# runAsNonRoot: true
# runAsUser: 1000
# fsGroup: 2000
containers:
- name: demo-app
image: rancher/hello-world:latest
ports:
- containerPort: 8080
resources:
limits:
cpu: "500m"
memory: "256Mi"
requests:
cpu: "250m"
memory: "128Mi"
---
apiVersion: v1
kind: Service
metadata:
name: demo-app
namespace: demo
spec:
type: LoadBalancer
ports:
- port: 80
targetPort: 8080
selector:
app: demo-app
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: demo-app-pdb
namespace: demo
spec:
minAvailable: 1
selector:
matchLabels:
app: demo-app
But instead of the regular pool scaling, the spot one did: