Merge pull request #42 from ministryofjustice/cluster-critical-priority

Add extra overprovision for daemonset
ministryofjustice · Jun 3, 2024 · 7eb8452 · 7eb8452
2 parents a7bd360 + cbae543
commit 7eb8452
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -49,6 +49,7 @@ module "cluster_autoscaler" {
 | [aws_iam_policy.cluster_autoscaler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
 | [helm_release.cluster-overprovisioner](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [helm_release.cluster-proportional-autoscaler-cpu](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
+| [helm_release.cluster-proportional-autoscaler-daemonset](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [helm_release.cluster-proportional-autoscaler-memory](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [helm_release.cluster_autoscaler](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource |
 | [kubernetes_namespace.overprovision](https://registry.terraform.io/providers/hashicorp/kubernetes/latest/docs/resources/namespace) | resource |

diff --git a/overprovision.tf b/overprovision.tf
@@ -16,6 +16,13 @@ locals {
     live-2  = var.live_cpu_request
     default = "10m"
   }
+
+  daemonset_overprovision = {
+    manager = "10m"
+    live    = var.live_cpu_request
+    live-2  = var.live_cpu_request
+    default = "10m"
+  }
 }
 
 resource "kubernetes_namespace" "overprovision" {
@@ -52,6 +59,7 @@ resource "helm_release" "cluster-overprovisioner" {
   values = [templatefile("${path.module}/templates/cluster-overprovisioner.yaml.tpl", {
     memory_overprovision = lookup(local.memory_overprovision, terraform.workspace, local.memory_overprovision["default"])
     cpu_overprovision    = lookup(local.cpu_overprovision, terraform.workspace, local.cpu_overprovision["default"])
+    daemonset_overprovision    = lookup(local.daemonset_overprovision, terraform.workspace, local.daemonset_overprovision["default"])
   })]
 
 }
@@ -82,3 +90,15 @@ resource "helm_release" "cluster-proportional-autoscaler-cpu" {
 
 }
 
+resource "helm_release" "cluster-proportional-autoscaler-daemonset" {
+  count      = var.enable_overprovision ? 1 : 0
+  name       = "cluster-proportional-autoscaler-daemonset"
+  chart      = "cluster-proportional-autoscaler"
+  namespace  = kubernetes_namespace.overprovision[count.index].id
+  repository = "https://kubernetes-sigs.github.io/cluster-proportional-autoscaler"
+  version    = "1.1.0"
+
+  values = [templatefile("${path.module}/templates/cpa-daemonset.yaml.tpl", {
+  })]
+
+}
diff --git a/templates/cluster-overprovisioner.yaml.tpl b/templates/cluster-overprovisioner.yaml.tpl
@@ -117,6 +117,40 @@ deployments:
       #   topologyKey: kubernetes.io/hostname
       #   whenUnsatisfiable: ScheduleAnyway
 
+  # Daemonset Overprovisioner deployment - This does not create a new daemonset but mimics other overprovisions by adding new pods with the topologySpreadConstraints to ensure distribution across nodes.
+  - name: daemonset
+    # deployments[0].annotations -- Default Deployment - Annotations to add to the deployment
+    annotations: {}
+    # deployments[0].podAnnotations -- Default Deployment - Annotations to add to the pods
+    podAnnotations: {}
+    # deployments[0].replicaCount -- Default Deployment - Number of replicas
+    replicaCount: 1
+    # deployments[0].nodeSelector -- Default Deployment - Node labels for pod assignment
+    nodeSelector: {}
+    resources:
+      limits:
+        # deployments[0].resources.limits.daemonset -- Default Deployment - daemonset limit for the overprovision pods
+        cpu: ${ daemonset_overprovision }
+        # deployments[0].resources.limits.memory -- Default Deployment - Memory limit for the overprovision pods
+        memory: "10Mi"
+      requests:
+        # deployments[0].resources.requests.daemonset -- Default Deployment - CPU requested for the overprovision pods
+        cpu: ${ daemonset_overprovision }
+        # deployments[0].resources.requests.memory -- Default Deployment - Memory requested for the overprovision pods
+        memory: "10Mi"
+    # deployments[0].tolerations -- Default Deployment - Optional deployment tolerations
+    tolerations: []
+    # deployments[0].affinity -- Default Deployment - Map of node/pod affinities
+    affinity: {}
+    # deployments[0].labels -- Default Deployment - Optional labels tolerations
+    labels: {}
+    # deployments[0].topologySpreadConstraints -- Default Deployment - Optional topology spread constraints
+    topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        nodeTaintsPolicy: Honor
+
 serviceAccount:
   # serviceAccount.create -- Determine whether a Service Account should be created or it should reuse an exiting one
   create: true

diff --git a/templates/cpa-daemonset.yaml.tpl b/templates/cpa-daemonset.yaml.tpl
@@ -0,0 +1,74 @@
+affinity: {}
+config: 
+#  ladder:
+#    coresToReplicas:
+#      - [ 1, 1 ]
+#      - [ 64, 3 ]
+#      - [ 512, 5 ]
+#      - [ 1024, 7 ]
+#      - [ 2048, 10 ]
+#      - [ 4096, 15 ]
+#    nodesToReplicas:
+#      - [ 1, 1 ]
+#      - [ 2, 2 ]
+  linear:
+    nodesPerReplica: 1
+    min: 1
+    max: 100
+    preventSinglePointFailure: true
+    includeUnschedulableNodes: true
+image:
+  repository: registry.k8s.io/cpa/cluster-proportional-autoscaler
+  pullPolicy: IfNotPresent
+  tag:
+imagePullSecrets: []
+fullnameOverride:
+nameOverride: daemonset
+nodeSelector: {}
+options:
+  alsoLogToStdErr: "true"
+  logBacktraceAt: 0
+  logDir: ""
+  #  --v=0: log level for V logs
+  logLevel: 0
+  # Defaulting to true limits use of ephemeral storage
+  logToStdErr: true
+  maxSyncFailures:
+  namespace: overprovision
+  nodeLabels: {}
+  #    label1: value1
+  #    label2: value2
+  pollPeriodSeconds: 10
+  stdErrThreshold: 2
+  target: "deployment/cluster-overprovisioner-daemonset"
+  vmodule:
+podAnnotations: {}
+podSecurityContext: {}
+# fsGroup: 2000
+replicaCount: 1
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+#   memory: 128Mi
+securityContext: {}
+  # capabilities:
+  #   drop:
+  #   - ALL
+  # readOnlyRootFilesystem: true
+  # runAsNonRoot: true
+# runAsUser: 1000
+serviceAccount:
+  create: true
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  # If set and create is false, no service account will be created and the expectation is that the provided service account already exists or it will use the "default" service account
+  name:
+tolerations: []