Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add support for controller-manager webhook shutdown delay #2601

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Prev Previous commit
Next Next commit
Add ShutdownDelay field
  • Loading branch information
dippynark committed Dec 3, 2023
commit f9393cbbe8a72def1e79e1f7f8056c9efacd943e
39 changes: 10 additions & 29 deletions pkg/manager/signals/signal.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,45 +20,26 @@ import (
"context"
"os"
"os/signal"
"time"
)

var (
onlyOneSignalHandler = make(chan struct{})
// Define global signal channel for testing
signalCh = make(chan os.Signal, 2)
)
var onlyOneSignalHandler = make(chan struct{})

// SetupSignalHandlerWithDelay registers for SIGTERM and SIGINT. A context is
// returned which is canceled on one of these signals after waiting for the
// specified delay. In particular, the delay can be used to give external
// Kubernetes controllers (such as kube-proxy) time to observe the termination
// of this manager before starting shutdown of any webhook servers to avoid
// receiving connection attempts after closing webhook listeners. If a second
// signal is caught, the program is terminated with exit code 1.
func SetupSignalHandlerWithDelay(delay time.Duration) context.Context {
// SetupSignalHandler registers for SIGTERM and SIGINT. A context is returned
// which is canceled on one of these signals. If a second signal is caught, the program
// is terminated with exit code 1.
func SetupSignalHandler() context.Context {
close(onlyOneSignalHandler) // panics when called twice

ctx, cancel := context.WithCancel(context.Background())

signal.Notify(signalCh, shutdownSignals...)
c := make(chan os.Signal, 2)
signal.Notify(c, shutdownSignals...)
go func() {
<-signalCh
// Cancel the context after delaying for the specified duration but
// avoid blocking if a second signal is caught
go func() {
<-time.After(delay)
cancel()
}()
<-signalCh
<-c
cancel()
<-c
os.Exit(1) // second signal. Exit directly.
}()

return ctx
}

// SetupSignalHandler is a special case of SetupSignalHandlerWithDelay with no
// delay for backwards compatibility
func SetupSignalHandler() context.Context {
return SetupSignalHandlerWithDelay(time.Duration(0))
}
61 changes: 47 additions & 14 deletions pkg/manager/signals/signal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ limitations under the License.
package signals

import (
"fmt"
"os"
"os/signal"
"sync"
"time"

. "github.com/onsi/ginkgo/v2"
Expand All @@ -28,23 +31,53 @@ var _ = Describe("runtime signal", func() {

Context("SignalHandler Test", func() {

It("test signal handler with delay", func() {
delay := time.Second
ctx := SetupSignalHandlerWithDelay(delay)
It("test signal handler", func() {
ctx := SetupSignalHandler()
task := &Task{
ticker: time.NewTicker(time.Second * 2),
}
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
task.wg.Add(1)
go func(c chan os.Signal) {
defer task.wg.Done()
task.Run(c)
}(c)

// Save time before sending signal
beforeSendingSignal := time.Now()

// Send signal
signalCh <- os.Interrupt

_, ok := <-ctx.Done()
// Verify that the channel was closed
Expect(ok).To(BeFalse())
// Verify that our delay was respected
Expect(time.Since(beforeSendingSignal)).To(BeNumerically(">=", delay))
select {
case sig := <-c:
fmt.Printf("Got %s signal. Aborting...\n", sig)
case _, ok := <-ctx.Done():
Expect(ok).To(BeFalse())
}
})

})

})

type Task struct {
wg sync.WaitGroup
ticker *time.Ticker
}

func (t *Task) Run(c chan os.Signal) {
for {
go sendSignal(c)
handle()
}
}

func handle() {
for i := 0; i < 5; i++ {
fmt.Print("#")
time.Sleep(time.Millisecond * 100)
}
fmt.Println()
}

func sendSignal(stopChan chan os.Signal) {
fmt.Printf("...")
time.Sleep(1 * time.Second)
stopChan <- os.Interrupt
}
15 changes: 14 additions & 1 deletion pkg/webhook/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ type Options struct {

// WebhookMux is the multiplexer that handles different webhooks.
WebhookMux *http.ServeMux

// ShutdownDelay delays server shutdown to wait for clients to stop opening
// new connections before closing server listeners. HTTP keep-alives are
// disabled during this time to allow persistent connections to be closed
// gracefully. Defaults to 0.
ShutdownDelay time.Duration
}

// NewServer constructs a new webhook.Server from the provided options.
Expand Down Expand Up @@ -247,9 +253,16 @@ func (s *DefaultServer) Start(ctx context.Context) error {
go func() {
<-ctx.Done()
log.Info("Shutting down webhook server with timeout of 1 minute")

ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
// Disable HTTP keep-alives to close persistent connections gracefully
srv.SetKeepAlivesEnabled(false)
// Wait for the specified shutdown delay or until the shutdown context
// expires, whichever happens first
select {
case <-time.After(s.Options.ShutdownDelay):
case <-ctx.Done():
Copy link
Member

@inteon inteon Dec 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This construction basically means that ShutdownDelay should not be longer than 1 minute, right?
Maybe that is something we can enforce & document in the comments linked to this field?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm yeah I don't think it's great being capped at 1 minute. Does it look better now? I've changed it to always wait for the full shutdown delay

}
if err := srv.Shutdown(ctx); err != nil {
// Error from closing listeners, or context timeout
log.Error(err, "error shutting down the HTTP server")
Expand Down