Skip to content

Commit

Permalink
Improve config validation health checking (istio#8118)
Browse files Browse the repository at this point in the history
* Improve config validation health checks (istio#7605)

Readiness and liveliness probes were failing on some providers because
GET requests were blocking for multiple seconds. Mitigate the issue
by decreasing the frequency of GET (to avoid possible throttling) and
increase the acceptable health check interval from 4s to 10s.

Shorm term fix for istio#7586. Longer
term fix requires switching to proper controller-style
reconciliation. That work should be aligned with the sidecar injector.

* decouple validation webhook's health checking and reconciliation loop (istio#7986)

The validation webhook's health file updates and configuration
reconciliation were invoked from the same goroutine. Delays in
checking and updating the k8s configuration could result in the health
file not being updated in time to pass the health checks. Use
istio.io/istio/pkg/probe to decouple the two code paths.
  • Loading branch information
ayj authored and rshriram committed Aug 22, 2018
1 parent 6096daf commit 6acd9fd
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 21 deletions.
36 changes: 23 additions & 13 deletions galley/pkg/crd/validation/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package validation
import (
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"io/ioutil"
"net/http"
Expand All @@ -26,6 +27,7 @@ import (

"github.com/ghodss/yaml"
"github.com/howeyc/fsnotify"

admissionv1beta1 "k8s.io/api/admission/v1beta1"
"k8s.io/api/admissionregistration/v1beta1"
extensionsv1beta1 "k8s.io/api/extensions/v1beta1"
Expand All @@ -41,6 +43,7 @@ import (
"istio.io/istio/pilot/pkg/config/kube/crd"
"istio.io/istio/pilot/pkg/model"
"istio.io/istio/pkg/log"
"istio.io/istio/pkg/probe"
)

var (
Expand All @@ -51,6 +54,7 @@ var (

const (
watchDebounceDelay = 100 * time.Millisecond
reconcilePeriod = 5 * time.Second
)

// WebhookParameters contains the configuration for the Istio Pilot validation
Expand Down Expand Up @@ -117,6 +121,8 @@ type Webhook struct {
// mixer
validator store.BackendValidator

healthProbe *probe.Probe
healthController probe.Controller
healthCheckInterval time.Duration
healthCheckFile string
server *http.Server
Expand Down Expand Up @@ -187,6 +193,17 @@ func NewWebhook(p WebhookParameters) (*Webhook, error) {
clientset: p.Clientset,
deploymentName: p.DeploymentName,
deploymentNamespace: p.DeploymentNamespace,
healthProbe: probe.NewProbe(),
healthController: probe.NewFileController(&probe.Options{
Path: p.HealthCheckFile,
UpdateInterval: p.HealthCheckInterval,
}),
}

if wh.healthCheckInterval != 0 && wh.healthCheckFile != "" {
wh.healthProbe.RegisterProbe(wh.healthController, "validation")
wh.healthProbe.SetAvailable(errors.New("not ready"))
wh.healthController.Start()
}

if galleyDeployment, err := wh.clientset.ExtensionsV1beta1().Deployments(wh.deploymentNamespace).Get(wh.deploymentName, v1.GetOptions{}); err != nil { // nolint: lll
Expand Down Expand Up @@ -226,13 +243,6 @@ func (wh *Webhook) Run(stop <-chan struct{}) {
}()
defer wh.stop()

var healthC <-chan time.Time
if wh.healthCheckInterval != 0 && wh.healthCheckFile != "" {
t := time.NewTicker(wh.healthCheckInterval)
healthC = t.C
defer t.Stop()
}

// use a timer to debounce file updates
var keyCertTimerC <-chan time.Time
var configTimerC <-chan time.Time
Expand All @@ -245,7 +255,12 @@ func (wh *Webhook) Run(stop <-chan struct{}) {

var reconcileTickerC <-chan time.Time
if wh.webhookConfigFile != "" {
reconcileTickerC = time.NewTicker(time.Second).C
reconcileTickerC = time.NewTicker(reconcilePeriod).C
}

if wh.healthCheckInterval != 0 && wh.healthCheckFile != "" {
wh.healthProbe.SetAvailable(nil)
defer wh.healthController.Close()
}

for {
Expand Down Expand Up @@ -278,11 +293,6 @@ func (wh *Webhook) Run(stop <-chan struct{}) {
log.Errorf("keyCertWatcher error: %v", err)
case err := <-wh.configWatcher.Error:
log.Errorf("configWatcher error: %v", err)
case <-healthC:
content := []byte(`ok`)
if err := ioutil.WriteFile(wh.healthCheckFile, content, 0644); err != nil {
log.Errorf("Health check update of %q failed: %v", wh.healthCheckFile, err)
}
case <-stop:
return
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ spec:
- --caCertFile=/etc/istio/certs/root-cert.pem
- --tlsCertFile=/etc/istio/certs/cert-chain.pem
- --tlsKeyFile=/etc/istio/certs/key.pem
- --healthCheckInterval=2s
- --healthCheckInterval=1s
- --healthCheckFile=/health
- --webhook-config-file
- /etc/istio/config/validatingwebhookconfiguration.yaml
Expand All @@ -56,18 +56,18 @@ spec:
- /usr/local/bin/galley
- probe
- --probe-path=/health
- --interval=4s
initialDelaySeconds: 4
periodSeconds: 4
- --interval=10s
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
exec:
command:
- /usr/local/bin/galley
- probe
- --probe-path=/health
- --interval=4s
initialDelaySeconds: 4
periodSeconds: 4
- --interval=10s
initialDelaySeconds: 5
periodSeconds: 5
- name: server
image: "{{ .Values.global.hub }}/{{ .Values.image }}:{{ .Values.global.tag }}"
imagePullPolicy: {{ .Values.global.imagePullPolicy }}
Expand Down Expand Up @@ -101,4 +101,4 @@ spec:
configMap:
name: istio-galley-configuration
affinity:
{{- include "nodeaffinity" . | indent 6 }}
{{- include "nodeaffinity" . | indent 6 }}

0 comments on commit 6acd9fd

Please sign in to comment.