Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions modules/terraform/azure/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@ locals {
aks_aad_enabled = lookup(var.json_input, "aks_aad_enabled", false)
enable_apiserver_vnet_integration = lookup(var.json_input, "enable_apiserver_vnet_integration", false)

tags = {
"owner" = var.owner
"scenario" = "${var.scenario_type}-${var.scenario_name}"
"creation_time" = timestamp()
"deletion_due_time" = timeadd(timestamp(), var.deletion_delay)
"run_id" = local.run_id
"SkipAKSCluster" = "1"
}
tags = merge(
var.tags,
{
"owner" = var.owner
"scenario" = "${var.scenario_type}-${var.scenario_name}"
"creation_time" = timestamp()
"deletion_due_time" = timeadd(timestamp(), var.deletion_delay)
"run_id" = local.run_id
"SkipAKSCluster" = "1"
}
)

network_config_map = { for network in var.network_config_list : network.role => network }

Expand Down
6 changes: 6 additions & 0 deletions modules/terraform/azure/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ variable "deletion_delay" {
default = "2h"
}

variable "tags" {
description = "Optional tags to apply to all resources"
type = map(string)
default = {}
}

variable "public_ip_config_list" {
description = "A list of public IP names"
type = list(object({
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
trigger: none
schedules:
# Azure Large Scale Schedule
- cron: "0 3 * * *"
displayName: "3:00 AM Daily"
branches:
include:
- main
always: true

variables:
SCENARIO_TYPE: perf-eval
SCENARIO_NAME: stls-bootstrap-nap

stages:
- stage: azure_westus2_large
condition: |
or(
eq(variables['Build.CronSchedule.DisplayName'], 'Every day at 6:00 AM'),
eq(variables['Build.Reason'], 'Manual')
)
dependsOn: []
jobs:
- template: /jobs/competitive-test.yml
parameters:
cloud: azure
regions:
- westus2
terraform_input_file_mapping:
- westus2: "scenarios/perf-eval/stls-bootstrap-nap/terraform-inputs/azure.tfvars"
engine: clusterloader2
engine_input:
image: "ghcr.io/azure/clusterloader2:v20250423"
topology: karpenter
matrix:
large-scale-on-demand:
cpu_per_node: 2
node_count: 1000
pod_count: 1000
scale_up_timeout: "60m"
scale_down_timeout: "60m"
node_label_selector: "karpenter.sh/nodepool = default"
node_selector: "{karpenter.sh/nodepool: default}"
loop_count: 1
warmup_deployment: true
warmup_deployment_template: warmup_deployment.yaml
vm_size: Standard_D2ds_v4
capacity_type: on-demand
max_parallel: 1
timeout_in_minutes: 360
credential_type: service_connection
ssh_key_enabled: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Shared AKSNodeClass (common for both Spot and On-Demand)
---
apiVersion: karpenter.azure.com/v1alpha2
kind: AKSNodeClass
metadata:
name: default
annotations:
kubernetes.io/description: "General purpose AKSNodeClass for running Ubuntu2204 nodes"
spec:
imageFamily: Ubuntu2204

# On-Demand NodePool (default)
---
apiVersion: karpenter.sh/v1
kind: NodePool
metadata:
name: default
annotations:
kubernetes.io/description: "General purpose On-Demand NodePool"
spec:
disruption:
consolidationPolicy: WhenEmpty
consolidateAfter: 1s
budgets:
- nodes: "100%"
template:
spec:
nodeClassRef:
group: karpenter.azure.com
kind: AKSNodeClass
name: default
expireAfter: Never
requirements:
- key: kubernetes.io/os
operator: In
values: ["linux"]
- key: karpenter.sh/capacity-type
operator: In
values: ["on-demand"]
- key: karpenter.azure.com/sku-name
operator: In
values: [Standard_D2_v5]

# Spot NodePool
---
apiVersion: karpenter.sh/v1
kind: NodePool
metadata:
name: spot
annotations:
kubernetes.io/description: "Spot NodePool for burstable cost-efficient workloads"
spec:
disruption:
consolidationPolicy: WhenEmpty
consolidateAfter: 1s
budgets:
- nodes: "100%"
template:
spec:
nodeClassRef:
group: karpenter.azure.com
kind: AKSNodeClass
name: default
expireAfter: Never
requirements:
- key: kubernetes.io/os
operator: In
values: ["linux"]
- key: karpenter.sh/capacity-type
operator: In
values: ["spot"]
- key: karpenter.azure.com/sku-name
operator: In
values: [Standard_D2_v5]
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
scenario_type = "perf-eval"
scenario_name = "stls-bootstrap-nap"
deletion_delay = "2h"
owner = "aks"
tags = {
enable-stls-nap = "true"
}

aks_config_list = []

aks_cli_config_list = [
{
role = "nap"
aks_name = "nap"
sku_tier = "standard"
aks_custom_headers = [
"AKSHTTPCustomFeatures=Microsoft.ContainerService/EnableSecureTLSBootstrapping"
]
kubernetes_version = "1.33"
default_node_pool = {
name = "system"
node_count = 5
vm_size = "Standard_D4_v5"
}
extra_node_pool = []
optional_parameters = [
{
name = "node-provisioning-mode"
value = "Auto"
},
{
name = "network-plugin"
value = "azure"
},
{
name = "network-plugin-mode"
value = "overlay"
},
{
name = "node-init-taints"
value = "CriticalAddonsOnly=true:NoSchedule"
},
{
name = "pod-cidr"
value = "10.128.0.0/11"
}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"run_id" : "123456789",
"region" : "westus2"
}