Skip to content

fix: multi controller run concurrently after leadership lost #2309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 11, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions pkg/controller/main-controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,12 +492,12 @@ func leaderRun(ctx context.Context, c *Controller, threadiness int, stopCh <-cha
for {
select {
case oerr := <-notificationChannel:
if !errors.Is(oerr.Err, http.ErrServerClosed) {
if oerr != nil && !errors.Is(oerr.Err, http.ErrServerClosed) {
klog.Errorf("STS API Server stopped: %v, going to restart", oerr.Err)
go c.startSTSAPIServer(ctx, notificationChannel)
}
case err := <-upgradeServerChannel:
if err != http.ErrServerClosed {
if err != nil && !errors.Is(err, http.ErrServerClosed) {
klog.Errorf("Upgrade Server stopped: %v, going to restart", err)
upgradeServerChannel = c.startUpgradeServer()
}
Expand Down Expand Up @@ -584,8 +584,24 @@ func (c *Controller) Start(threadiness int, stopCh <-chan struct{}) error {
leaderRun(ctx, c, threadiness, stopCh, notificationChannel)
},
OnStoppedLeading: func() {
// we can do cleanup here
klog.Infof("leader lost: %s", c.podName)
klog.Infof("leader lost, removing any leader labels that I '%s' might have", c.podName)
p := []patchAnnotation{{
Op: "remove",
Path: "/metadata/labels/operator",
}}

payloadBytes, err := json.Marshal(p)
if err != nil {
klog.Errorf("failed to marshal patch: %#v", err)
} else {
c.kubeClientSet.CoreV1().Pods(leaseLockNamespace).Patch(ctx, c.podName, types.JSONPatchType, payloadBytes, metav1.PatchOptions{})
}
// Even if Stop() is called twice, stopping it here ensures the sync handler no longer is handling events,
// in case SIGTERM fails or the controller takes longer to exit.
c.Stop()
if err := syscall.Kill(os.Getpid(), syscall.SIGTERM); err != nil {
klog.Errorf("error sending SIGTERM: %v", err)
}
},
OnNewLeader: func(identity string) {
// we're notified when new leader elected
Expand Down
Loading