Skip to content

Commit

Permalink
Recover panic for controller reconcile and webhook handler (kubernete…
Browse files Browse the repository at this point in the history
…s-sigs#1627)

Signed-off-by: FillZpp <FillZpp.pub@gmail.com>
  • Loading branch information
FillZpp authored and tamalsaha committed Feb 12, 2022
1 parent 0e7a1aa commit 811f7a3
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 20 deletions.
4 changes: 4 additions & 0 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ type Options struct {
// CacheSyncTimeout refers to the time limit set to wait for syncing caches.
// Defaults to 2 minutes if not set.
CacheSyncTimeout time.Duration

// RecoverPanic indicates whether the panic caused by reconcile should be recovered.
RecoverPanic bool
}

// Controller implements a Kubernetes API. A Controller manages a work queue fed reconcile.Requests
Expand Down Expand Up @@ -133,5 +136,6 @@ func NewUnmanaged(name string, mgr manager.Manager, options Options) (Controller
SetFields: mgr.SetFields,
Name: name,
Log: options.Log.WithName("controller").WithName(name),
RecoverPanic: options.RecoverPanic,
}, nil
}
51 changes: 31 additions & 20 deletions pkg/internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (

var _ inject.Injector = &Controller{}

// Controller implements controller.Controller
// Controller implements controller.Controller.
type Controller struct {
// Name is used to uniquely identify a Controller in tracing, logging and monitoring. Name is required.
Name string
Expand Down Expand Up @@ -85,6 +85,9 @@ type Controller struct {

// Log is used to log messages to users during reconciliation, or for example when a watch is started.
Log logr.Logger

// RecoverPanic indicates whether the panic caused by reconcile should be recovered.
RecoverPanic bool
}

// watchDescription contains all the information necessary to start a watch.
Expand All @@ -94,14 +97,24 @@ type watchDescription struct {
predicates []predicate.Predicate
}

// Reconcile implements reconcile.Reconciler
func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
// Reconcile implements reconcile.Reconciler.
func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
if c.RecoverPanic {
defer func() {
if r := recover(); r != nil {
for _, fn := range utilruntime.PanicHandlers {
fn(r)
}
err = fmt.Errorf("panic: %v [recovered]", r)
}
}()
}
log := c.Log.WithValues("name", req.Name, "namespace", req.Namespace)
ctx = logf.IntoContext(ctx, log)
return c.Do.Reconcile(ctx, req)
}

// Watch implements controller.Controller
// Watch implements controller.Controller.
func (c *Controller) Watch(src source.Source, evthdler handler.EventHandler, prct ...predicate.Predicate) error {
c.mu.Lock()
defer c.mu.Unlock()
Expand Down Expand Up @@ -131,7 +144,7 @@ func (c *Controller) Watch(src source.Source, evthdler handler.EventHandler, prc
return src.Start(c.ctx, evthdler, c.Queue, prct...)
}

// Start implements controller.Controller
// Start implements controller.Controller.
func (c *Controller) Start(ctx context.Context) error {
// use an IIFE to get proper lock handling
// but lock outside to get proper handling of the queue shutdown
Expand Down Expand Up @@ -162,7 +175,7 @@ func (c *Controller) Start(ctx context.Context) error {
// caches to sync so that they have a chance to register their intendeded
// caches.
for _, watch := range c.startWatches {
c.Log.Info("Starting EventSource", "source", watch.src)
c.Log.Info("Starting EventSource", "source", fmt.Sprintf("%s", watch.src))

if err := watch.src.Start(ctx, watch.handler, c.Queue, watch.predicates...); err != nil {
return err
Expand Down Expand Up @@ -295,46 +308,44 @@ func (c *Controller) reconcileHandler(ctx context.Context, obj interface{}) {

// RunInformersAndControllers the syncHandler, passing it the Namespace/Name string of the
// resource to be synced.
if result, err := c.Do.Reconcile(ctx, req); err != nil {
result, err := c.Reconcile(ctx, req)
switch {
case err != nil:
c.Queue.AddRateLimited(req)
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Inc()
log.Error(err, "Reconciler error")
return
} else if result.RequeueAfter > 0 {
case result.RequeueAfter > 0:
// The result.RequeueAfter request will be lost, if it is returned
// along with a non-nil error. But this is intended as
// We need to drive to stable reconcile loops before queuing due
// to result.RequestAfter
c.Queue.Forget(obj)
c.Queue.AddAfter(req, result.RequeueAfter)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeueAfter).Inc()
return
} else if result.Requeue {
case result.Requeue:
c.Queue.AddRateLimited(req)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelRequeue).Inc()
return
default:
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.Queue.Forget(obj)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelSuccess).Inc()
}

// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.Queue.Forget(obj)

ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelSuccess).Inc()
}

// GetLogger returns this controller's logger.
func (c *Controller) GetLogger() logr.Logger {
return c.Log
}

// InjectFunc implement SetFields.Injector
// InjectFunc implement SetFields.Injector.
func (c *Controller) InjectFunc(f inject.Func) error {
c.SetFields = f
return nil
}

// updateMetrics updates prometheus metrics within the controller
// updateMetrics updates prometheus metrics within the controller.
func (c *Controller) updateMetrics(reconcileTime time.Duration) {
ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds())
}
33 changes: 33 additions & 0 deletions pkg/internal/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,39 @@ var _ = Describe("controller", func() {
Expect(err).NotTo(HaveOccurred())
Expect(result).To(Equal(reconcile.Result{Requeue: true}))
})

It("should not recover panic if RecoverPanic is false by default", func() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

defer func() {
Expect(recover()).ShouldNot(BeNil())
}()
ctrl.Do = reconcile.Func(func(context.Context, reconcile.Request) (reconcile.Result, error) {
var res *reconcile.Result
return *res, nil
})
_, _ = ctrl.Reconcile(ctx,
reconcile.Request{NamespacedName: types.NamespacedName{Namespace: "foo", Name: "bar"}})
})

It("should recover panic if RecoverPanic is true", func() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

defer func() {
Expect(recover()).To(BeNil())
}()
ctrl.RecoverPanic = true
ctrl.Do = reconcile.Func(func(context.Context, reconcile.Request) (reconcile.Result, error) {
var res *reconcile.Result
return *res, nil
})
_, err := ctrl.Reconcile(ctx,
reconcile.Request{NamespacedName: types.NamespacedName{Namespace: "foo", Name: "bar"}})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("[recovered]"))
})
})

Describe("Start", func() {
Expand Down

0 comments on commit 811f7a3

Please sign in to comment.