feat(supervisor): project-based scheduling affinity for image cache l…

…ocality Adds optional pod affinity so pods from the same project prefer scheduling on the same node. This can help improve image cache hit rates; subsequent pods benefit from already-pulled image layers, reducing startup time. Complements the built-in ImageLocality scheduler plugin by helping during burst scheduling scenarios. Pod affinity sees scheduled pods immediately, while ImageLocality only sees images after they're fully pulled. Configuration: - `KUBERNETES_PROJECT_AFFINITY_ENABLED` - Enable/disable (default: false) - `KUBERNETES_PROJECT_AFFINITY_WEIGHT` - Scheduler weight 1-100 (default: 50) - `KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY` - Topology key (default: kubernetes.io/hostname) Uses soft (preferred) affinity so pods always schedule even if preferred node is full.
triggerdotdev · myftija · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026
commit 306cbc8390d46121e9cdcf603058b1a759c9f736
diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts
@@ -112,6 +112,11 @@ const Env = z.object({
   KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods
   KUBERNETES_LARGE_MACHINE_POOL_LABEL: z.string().optional(), // if set, large-* presets affinity for machinepool=<value>
 
+  // Project affinity settings - pods from the same project prefer the same node
+  KUBERNETES_PROJECT_AFFINITY_ENABLED: BoolEnv.default(false),
+  KUBERNETES_PROJECT_AFFINITY_WEIGHT: z.coerce.number().int().min(1).max(100).default(50),
+  KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY: z.string().default("kubernetes.io/hostname"),
+
   // Placement tags settings
   PLACEMENT_TAGS_ENABLED: BoolEnv.default(false),
   PLACEMENT_TAGS_PREFIX: z.string().default("node.cluster.x-k8s.io"),

diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts
@@ -120,7 +120,7 @@ export class KubernetesWorkloadManager implements WorkloadManager {
           },
           spec: {
             ...this.addPlacementTags(this.#defaultPodSpec, opts.placementTags),
-            affinity: this.#getNodeAffinity(opts.machine),
+            affinity: this.#getAffinity(opts.machine, opts.projectId),
             terminationGracePeriodSeconds: 60 * 60,
             containers: [
               {
@@ -390,50 +390,86 @@ export class KubernetesWorkloadManager implements WorkloadManager {
     return preset.name.startsWith("large-");
   }
 
-  #getNodeAffinity(preset: MachinePreset): k8s.V1Affinity | undefined {
+  #getAffinity(preset: MachinePreset, projectId: string): k8s.V1Affinity | undefined {
+    const nodeAffinity = this.#getNodeAffinityRules(preset);
+    const podAffinity = this.#getProjectPodAffinity(projectId);
+
+    if (!nodeAffinity && !podAffinity) {
+      return undefined;
+    }
+
+    return {
+      ...(nodeAffinity && { nodeAffinity }),
+      ...(podAffinity && { podAffinity }),
+    };
+  }
+
+  #getNodeAffinityRules(preset: MachinePreset): k8s.V1NodeAffinity | undefined {
     if (!env.KUBERNETES_LARGE_MACHINE_POOL_LABEL) {
       return undefined;
     }
 
     if (this.#isLargeMachine(preset)) {
       // soft preference for the large-machine pool, falls back to standard if unavailable
       return {
-        nodeAffinity: {
-          preferredDuringSchedulingIgnoredDuringExecution: [
-            {
-              weight: 100,
-              preference: {
-                matchExpressions: [
-                  {
-                    key: "node.cluster.x-k8s.io/machinepool",
-                    operator: "In",
-                    values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
-                  },
-                ],
-              },
+        preferredDuringSchedulingIgnoredDuringExecution: [
+          {
+            weight: 100,
+            preference: {
+              matchExpressions: [
+                {
+                  key: "node.cluster.x-k8s.io/machinepool",
+                  operator: "In",
+                  values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
+                },
+              ],
             },
-          ],
-        },
+          },
+        ],
       };
     }
 
     // not schedulable in the large-machine pool
     return {
-      nodeAffinity: {
-        requiredDuringSchedulingIgnoredDuringExecution: {
-          nodeSelectorTerms: [
-            {
+      requiredDuringSchedulingIgnoredDuringExecution: {
+        nodeSelectorTerms: [
+          {
+            matchExpressions: [
+              {
+                key: "node.cluster.x-k8s.io/machinepool",
+                operator: "NotIn",
+                values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
+              },
+            ],
+          },
+        ],
+      },
+    };
+  }
+
+  #getProjectPodAffinity(projectId: string): k8s.V1PodAffinity | undefined {
+    if (!env.KUBERNETES_PROJECT_AFFINITY_ENABLED) {
+      return undefined;
+    }
+
+    return {
+      preferredDuringSchedulingIgnoredDuringExecution: [
+        {
+          weight: env.KUBERNETES_PROJECT_AFFINITY_WEIGHT,
+          podAffinityTerm: {
+            labelSelector: {
               matchExpressions: [
                 {
-                  key: "node.cluster.x-k8s.io/machinepool",
-                  operator: "NotIn",
-                  values: [env.KUBERNETES_LARGE_MACHINE_POOL_LABEL],
+                  key: "project",
+                  operator: "In",
+                  values: [projectId],
                 },
               ],
             },
-          ],
+            topologyKey: env.KUBERNETES_PROJECT_AFFINITY_TOPOLOGY_KEY,
+          },
         },
-      },
+      ],
     };
   }
 }