Skip to content

Commit

Permalink
Clean up WorkersToDelete field during the CI test (ray-project#1763)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yicheng-Lu-llll authored Dec 21, 2023
1 parent e11a9b7 commit 046d4c4
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
25 changes: 15 additions & 10 deletions ray-operator/controllers/ray/raycluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,16 +244,8 @@ var _ = Context("Inside the default namespace", func() {
Eventually(
listResourceFunc(ctx, &workerPods, workerFilterLabels, &client.ListOptions{Namespace: "default"}),
time.Second*15, time.Millisecond*500).Should(Equal(2), fmt.Sprintf("workerGroup %v", workerPods.Items))
// Updating WorkersToDelete is the responsibility of the Ray Autoscaler. Here, we simulate the Ray
// Autoscaler's behavior after the scale-down process is completed.
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
Eventually(
getResourceFunc(ctx, client.ObjectKey{Name: myRayCluster.Name, Namespace: "default"}, myRayCluster),
time.Second*9, time.Millisecond*500).Should(BeNil(), "My raycluster = %v", myRayCluster)
myRayCluster.Spec.WorkerGroupSpecs[0].ScaleStrategy.WorkersToDelete = []string{}
return k8sClient.Update(ctx, myRayCluster)
})
Expect(err).NotTo(HaveOccurred(), "failed to update test RayCluster resource")

cleanUpWorkersToDelete(ctx, myRayCluster, 0)
})

It("should increase replicas past maxReplicas", func() {
Expand Down Expand Up @@ -450,3 +442,16 @@ func isAllPodsRunning(ctx context.Context, podlist corev1.PodList, filterLabels
}
return true
}

func cleanUpWorkersToDelete(ctx context.Context, rayCluster *rayv1.RayCluster, workerGroupIndex int) {
// Updating WorkersToDelete is the responsibility of the Ray Autoscaler. In this function,
// we simulate the behavior of the Ray Autoscaler after the scaling process has finished.
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
Eventually(
getResourceFunc(ctx, client.ObjectKey{Name: rayCluster.Name, Namespace: "default"}, rayCluster),
time.Second*9, time.Millisecond*500).Should(BeNil(), "raycluster = %v", rayCluster)
rayCluster.Spec.WorkerGroupSpecs[workerGroupIndex].ScaleStrategy.WorkersToDelete = []string{}
return k8sClient.Update(ctx, rayCluster)
})
Expect(err).NotTo(HaveOccurred(), "failed to clean up WorkersToDelete")
}
4 changes: 4 additions & 0 deletions ray-operator/controllers/ray/rayservice_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ applications:
Eventually(
getResourceFunc(ctx, client.ObjectKey{Name: myRayService.Status.ActiveServiceStatus.RayClusterName, Namespace: "default"}, myRayCluster),
time.Second*3, time.Millisecond*500).Should(BeNil(), "My myRayCluster = %v", myRayCluster.Name)

cleanUpWorkersToDelete(ctx, myRayCluster, 0)
})

It("Autoscaler updates the pending RayCluster and should not switch to a new RayCluster", func() {
Expand Down Expand Up @@ -482,6 +484,8 @@ applications:
Eventually(
getRayClusterNameFunc(ctx, myRayService),
time.Second*15, time.Millisecond*500).Should(Equal(initialPendingClusterName), "New active RayCluster name = %v", myRayService.Status.ActiveServiceStatus.RayClusterName)

cleanUpWorkersToDelete(ctx, myRayCluster, 0)
})

It("Status should be updated if the differences are not only LastUpdateTime and HealthLastUpdateTime fields.", func() {
Expand Down

0 comments on commit 046d4c4

Please sign in to comment.