Skip to content

Commit

Permalink
Add statefulset re-creation metric
Browse files Browse the repository at this point in the history
  • Loading branch information
vsliouniaev committed Nov 15, 2019
1 parent 5e4df96 commit e263319
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 3 deletions.
10 changes: 9 additions & 1 deletion cmd/operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func Main() int {

reconcileErrorsCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_operator_reconcile_errors_total",
Help: "Number of errors that occurred while reconciling the alertmanager statefulset",
Help: "Number of errors that occurred while reconciling the statefulset",
}, []string{"controller"})

triggerByCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
Expand All @@ -242,6 +242,11 @@ func Main() int {
" triggered the Prometheus Operator to reconcile an object",
}, []string{"controller", "triggered_by", "action"})

stsDeleteCreateCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_operator_reconcile_sts_delete_create_total",
Help: "Number of times that reconciling a statefulset required deleting and re-creating it",
}, []string{"controller"})

validationTriggeredCounter := prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_operator_rule_validation_triggered_total",
Help: "Number of times a prometheusRule object triggered validation",
Expand All @@ -258,6 +263,7 @@ func Main() int {
prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}),
reconcileErrorsCounter,
triggerByCounter,
stsDeleteCreateCounter,
validationTriggeredCounter,
validationErrorsCounter,
)
Expand All @@ -272,13 +278,15 @@ func Main() int {
prometheus.WrapRegistererWith(prometheusLabels, r),
reconcileErrorsCounter.MustCurryWith(prometheusLabels),
triggerByCounter.MustCurryWith(prometheusLabels),
stsDeleteCreateCounter.MustCurryWith(prometheusLabels),
)

alertmanagerLabels := prometheus.Labels{"controller": "alertmanager"}
ao.RegisterMetrics(
prometheus.WrapRegistererWith(alertmanagerLabels, r),
reconcileErrorsCounter.MustCurryWith(alertmanagerLabels),
triggerByCounter.MustCurryWith(alertmanagerLabels),
stsDeleteCreateCounter.MustCurryWith(alertmanagerLabels),
)

mux.Handle("/metrics", promhttp.HandlerFor(r, promhttp.HandlerOpts{}))
Expand Down
5 changes: 4 additions & 1 deletion pkg/alertmanager/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type Operator struct {

reconcileErrorsCounter *prometheus.CounterVec
triggerByCounter *prometheus.CounterVec
stsDeleteCreateCounter *prometheus.CounterVec

config Config
}
Expand Down Expand Up @@ -155,9 +156,10 @@ func New(c prometheusoperator.Config, logger log.Logger) (*Operator, error) {
return o, nil
}

func (c *Operator) RegisterMetrics(r prometheus.Registerer, reconcileErrorsCounter *prometheus.CounterVec, triggerByCounter *prometheus.CounterVec) {
func (c *Operator) RegisterMetrics(r prometheus.Registerer, reconcileErrorsCounter *prometheus.CounterVec, triggerByCounter *prometheus.CounterVec, stsDeleteCreateCounter *prometheus.CounterVec) {
c.reconcileErrorsCounter = reconcileErrorsCounter
c.triggerByCounter = triggerByCounter
c.stsDeleteCreateCounter = stsDeleteCreateCounter

c.reconcileErrorsCounter.With(prometheus.Labels{}).Add(0)

Expand Down Expand Up @@ -494,6 +496,7 @@ func (c *Operator) sync(key string) error {
sErr, ok := err.(*apierrors.StatusError)

if ok && sErr.ErrStatus.Code == 422 && sErr.ErrStatus.Reason == metav1.StatusReasonInvalid {
c.stsDeleteCreateCounter.With(prometheus.Labels{}).Inc()
level.Debug(c.logger).Log("msg", "resolving illegal update of Alertmanager StatefulSet")
propagationPolicy := metav1.DeletePropagationForeground
if err := ssetClient.Delete(sset.GetName(), &metav1.DeleteOptions{PropagationPolicy: &propagationPolicy}); err != nil {
Expand Down
5 changes: 4 additions & 1 deletion pkg/prometheus/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ type Operator struct {
queue workqueue.RateLimitingInterface

reconcileErrorsCounter *prometheus.CounterVec
stsDeleteCreateCounter *prometheus.CounterVec

// triggerByCounter is a set of counters keeping track of the amount
// of times Prometheus Operator was triggered to reconcile its created
Expand Down Expand Up @@ -340,9 +341,10 @@ func New(conf Config, logger log.Logger) (*Operator, error) {

// RegisterMetrics registers Prometheus metrics on the given Prometheus
// registerer.
func (c *Operator) RegisterMetrics(r prometheus.Registerer, reconcileErrorsCounter *prometheus.CounterVec, triggerByCounter *prometheus.CounterVec) {
func (c *Operator) RegisterMetrics(r prometheus.Registerer, reconcileErrorsCounter *prometheus.CounterVec, triggerByCounter *prometheus.CounterVec, stsDeleteCreateCounter *prometheus.CounterVec) {
c.reconcileErrorsCounter = reconcileErrorsCounter
c.triggerByCounter = triggerByCounter
c.stsDeleteCreateCounter = stsDeleteCreateCounter

c.reconcileErrorsCounter.With(prometheus.Labels{}).Add(0)

Expand Down Expand Up @@ -1154,6 +1156,7 @@ func (c *Operator) sync(key string) error {
sErr, ok := err.(*apierrors.StatusError)

if ok && sErr.ErrStatus.Code == 422 && sErr.ErrStatus.Reason == metav1.StatusReasonInvalid {
c.stsDeleteCreateCounter.With(prometheus.Labels{}).Inc()
level.Debug(c.logger).Log("msg", "resolving illegal update of Prometheus StatefulSet")
propagationPolicy := metav1.DeletePropagationForeground
if err := ssetClient.Delete(sset.GetName(), &metav1.DeleteOptions{PropagationPolicy: &propagationPolicy}); err != nil {
Expand Down

0 comments on commit e263319

Please sign in to comment.