kubernetes · xigang · Jun 14, 2025 · mimowo · Jun 17, 2025
diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go
@@ -109,6 +109,9 @@ type Controller struct {
 	// A store of pods, populated by the podController
 	podStore corelisters.PodLister
 
+	// podIndexer allows looking up pods by ControllerRef UID
+	podIndexer cache.Indexer
+
 	// Jobs that need to be updated
 	queue workqueue.TypedRateLimitingInterface[string]
 
@@ -223,6 +226,9 @@ func newControllerWithClock(ctx context.Context, podInformer coreinformers.PodIn
 	jm.podStore = podInformer.Lister()
 	jm.podStoreSynced = podInformer.Informer().HasSynced
 
+	controller.AddPodControllerUIDIndexer(podInformer.Informer())
+	jm.podIndexer = podInformer.Informer().GetIndexer()
+
 	jm.updateStatusHandler = jm.updateJobStatus
 	jm.patchJobHandler = jm.patchJob
 	jm.syncHandler = jm.syncJob
@@ -758,12 +764,13 @@ func (jm *Controller) getPodsForJob(ctx context.Context, j *batch.Job) ([]*v1.Po
 	if err != nil {
 		return nil, fmt.Errorf("couldn't convert Job selector: %v", err)
 	}
-	// List all pods to include those that don't match the selector anymore
-	// but have a ControllerRef pointing to this controller.
-	pods, err := jm.podStore.Pods(j.Namespace).List(labels.Everything())
+
+	// list all pods managed by this Job using the pod indexer
+	pods, err := jm.getJobPodsByIndexer(ctx, j)
 	if err != nil {
 		return nil, err
 	}
+
 	// If any adoptions are attempted, we should first recheck for deletion
 	// with an uncached quorum read sometime after listing Pods (see #42639).
 	canAdoptFunc := controller.RecheckDeletionTimestamp(func(ctx context.Context) (metav1.Object, error) {
@@ -799,6 +806,27 @@ func (jm *Controller) getPodsForJob(ctx context.Context, j *batch.Job) ([]*v1.Po
 	return pods, err
 }
 
+// getJobPodsByIndexer returns the set of pods that this Job should manage.
+func (jm *Controller) getJobPodsByIndexer(ctx context.Context, j *batch.Job) ([]*v1.Pod, error) {
+	podsForJob := []*v1.Pod{}
+	for _, key := range []string{string(j.UID), controller.OrphanPodIndexKey} {
+		pods, err := jm.podIndexer.ByIndex(controller.PodControllerUIDIndex, key)
+		if err != nil {
+			return nil, err
+		}
+
+		for _, obj := range pods {
+			pod, ok := obj.(*v1.Pod)
+			if !ok {
+				utilruntime.HandleError(fmt.Errorf("unexpected object type in pod indexer: %v", obj))
+				continue
+			}
+			podsForJob = append(podsForJob, pod)
+		}
+	}
+	return podsForJob, nil
+}
+
 // syncJob will sync the job with the given key if it has had its expectations fulfilled, meaning
 // it did not expect to see any more of its pods created or deleted. This function is not meant to be invoked
 // concurrently with the same key.