From 0b82ac66e7fe430a6ccc0fe0c500da4526df41bd Mon Sep 17 00:00:00 2001 From: Jeev B Date: Mon, 28 Jan 2019 02:51:27 -0800 Subject: [PATCH] [stable/node-problem-detector] User-specifed custom monitor definitions (#10908) * Add provision for specifying custom monitor definitions for Node Problem Detector. Signed-off-by: Jeev B * Update Chart.yaml Signed-off-by: David J. M. Karlsen --- stable/node-problem-detector/Chart.yaml | 2 +- stable/node-problem-detector/README.md | 35 +++---- .../templates/_helpers.tpl | 8 ++ .../templates/configmap.yaml | 94 ------------------- .../templates/custom-config-configmap.yaml | 11 +++ .../templates/daemonset.yaml | 17 ++-- stable/node-problem-detector/values.yaml | 33 ++++++- 7 files changed, 75 insertions(+), 125 deletions(-) delete mode 100644 stable/node-problem-detector/templates/configmap.yaml create mode 100644 stable/node-problem-detector/templates/custom-config-configmap.yaml diff --git a/stable/node-problem-detector/Chart.yaml b/stable/node-problem-detector/Chart.yaml index 37d5de151efa..1a11ea61d4a4 100644 --- a/stable/node-problem-detector/Chart.yaml +++ b/stable/node-problem-detector/Chart.yaml @@ -1,5 +1,5 @@ name: node-problem-detector -version: "1.2.0" +version: "1.3.0" appVersion: v0.6.1 home: https://github.com/kubernetes/node-problem-detector description: Installs the node-problem-detector daemonset for monitoring extra attributes on nodes diff --git a/stable/node-problem-detector/README.md b/stable/node-problem-detector/README.md index f5cf61bbf31c..62202389c73c 100644 --- a/stable/node-problem-detector/README.md +++ b/stable/node-problem-detector/README.md @@ -34,23 +34,24 @@ Custom System log monitor config files can be created, see [here](https://github The following table lists the configurable parameters for this chart and their default values. -| Parameter | Description | Default | -|-----------------------------------|--------------------------------------------|--------------------------------------------------------------| -| `affinity` | Map of node/pod affinities | `{}` | -| `annotations` | Optional daemonset annotations | `{}` | -| `fullnameOverride` | Override the fullname of the chart | `nil` | -| `image.pullPolicy` | Image pull policy | `IfNotPresent` | -| `image.repository` | Image | `k8s.gcr.io/node-problem-detector` | -| `image.tag` | Image tag | `v0.6.1` | -| `nameOverride` | Override the name of the chart | `nil` | -| `rbac.create` | RBAC | `true` | -| `hostNetwork` | Run pod on host network | `false` | -| `resources` | Pod resource requests and limits | `{}` | -| `settings.log_monitors` | System log monitor config files | `/config/kernel-monitor.json`, `/config/docker-monitor.json` | -| `settings.custom_plugin_monitors` | Custom plugin monitor config files | `[]` | -| `serviceAccount.create` | Whether a ServiceAccount should be created | `true` | -| `serviceAccount.name` | Name of the ServiceAccount to create | Generated value from template | -| `tolerations` | Optional daemonset tolerations | `[]` | +| Parameter | Description | Default | +|---------------------------------------|--------------------------------------------|--------------------------------------------------------------| +| `affinity` | Map of node/pod affinities | `{}` | +| `annotations` | Optional daemonset annotations | `{}` | +| `fullnameOverride` | Override the fullname of the chart | `nil` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `image.repository` | Image | `k8s.gcr.io/node-problem-detector` | +| `image.tag` | Image tag | `v0.6.1` | +| `nameOverride` | Override the name of the chart | `nil` | +| `rbac.create` | RBAC | `true` | +| `hostNetwork` | Run pod on host network | `false` | +| `resources` | Pod resource requests and limits | `{}` | +| `settings.custom_monitor_definitions` | User-specified custom monitor definitions | `{}` | +| `settings.log_monitors` | System log monitor config files | `/config/kernel-monitor.json`, `/config/docker-monitor.json` | +| `settings.custom_plugin_monitors` | Custom plugin monitor config files | `[]` | +| `serviceAccount.create` | Whether a ServiceAccount should be created | `true` | +| `serviceAccount.name` | Name of the ServiceAccount to create | Generated value from template | +| `tolerations` | Optional daemonset tolerations | `[]` | Specify each parameter using the `--set key=value[,key=value]` argument to `helm install` or provide a YAML file containing the values for the above parameters: diff --git a/stable/node-problem-detector/templates/_helpers.tpl b/stable/node-problem-detector/templates/_helpers.tpl index f47c7e7d165a..25077d3171ee 100644 --- a/stable/node-problem-detector/templates/_helpers.tpl +++ b/stable/node-problem-detector/templates/_helpers.tpl @@ -40,3 +40,11 @@ Create chart name and version as used by the chart label. {{ default "default" .Values.serviceAccount.name }} {{- end -}} {{- end -}} + +{{/* +Create the name of the configmap for storing custom monitor definitions +*/}} +{{- define "node-problem-detector.customConfig" -}} +{{- $fullname := include "node-problem-detector.fullname" . -}} +{{- printf "%s-custom-config" $fullname | replace "+" "_" | trunc 63 -}} +{{- end -}} diff --git a/stable/node-problem-detector/templates/configmap.yaml b/stable/node-problem-detector/templates/configmap.yaml deleted file mode 100644 index 14300fd4852b..000000000000 --- a/stable/node-problem-detector/templates/configmap.yaml +++ /dev/null @@ -1,94 +0,0 @@ -apiVersion: v1 -data: - kernel-monitor.json: | - { - "plugin": "kmsg", - "logPath": "/dev/kmsg", - "lookback": "5m", - "bufferSize": 10, - "source": "kernel-monitor", - "conditions": [ - { - "type": "KernelDeadlock", - "reason": "KernelHasNoDeadlock", - "message": "kernel has no deadlock" - }, - { - "type": "ReadonlyFilesystem", - "reason": "FilesystemIsReadOnly", - "message": "Filesystem is read-only" - } - ], - "rules": [ - { - "type": "temporary", - "reason": "OOMKilling", - "pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*" - }, - { - "type": "temporary", - "reason": "TaskHung", - "pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\." - }, - { - "type": "temporary", - "reason": "UnregisterNetDevice", - "pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+" - }, - { - "type": "temporary", - "reason": "KernelOops", - "pattern": "BUG: unable to handle kernel NULL pointer dereference at .*" - }, - { - "type": "temporary", - "reason": "KernelOops", - "pattern": "divide error: 0000 \\[#\\d+\\] SMP" - }, - { - "type": "permanent", - "condition": "KernelDeadlock", - "reason": "AUFSUmountHung", - "pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\." - }, - { - "type": "permanent", - "condition": "KernelDeadlock", - "reason": "DockerHung", - "pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\." - }, - { - "type": "permanent", - "condition": "ReadonlyFilesystem", - "reason": "FilesystemIsReadOnly", - "pattern": "Remounting filesystem read-only" - } - ] - } - docker-monitor.json: | - { - "plugin": "journald", - "pluginConfig": { - "source": "dockerd" - }, - "logPath": "/var/log/journal", - "lookback": "5m", - "bufferSize": 10, - "source": "docker-monitor", - "conditions": [], - "rules": [ - { - "type": "temporary", - "reason": "CorruptDockerImage", - "pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*" - } - ] - } -kind: ConfigMap -metadata: - name: {{ include "node-problem-detector.fullname" . }} - labels: - app.kubernetes.io/name: {{ include "node-problem-detector.name" . }} - helm.sh/chart: {{ include "node-problem-detector.chart" . }} - app.kubernetes.io/instance: {{ .Release.Name }} - app.kubernetes.io/managed-by: {{ .Release.Service }} diff --git a/stable/node-problem-detector/templates/custom-config-configmap.yaml b/stable/node-problem-detector/templates/custom-config-configmap.yaml new file mode 100644 index 000000000000..edf754c9473c --- /dev/null +++ b/stable/node-problem-detector/templates/custom-config-configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +data: +{{ .Values.settings.custom_monitor_definitions | toYaml | indent 2 }} +kind: ConfigMap +metadata: + name: {{ include "node-problem-detector.customConfig" . }} + labels: + app.kubernetes.io/name: {{ include "node-problem-detector.name" . }} + helm.sh/chart: {{ include "node-problem-detector.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} diff --git a/stable/node-problem-detector/templates/daemonset.yaml b/stable/node-problem-detector/templates/daemonset.yaml index 1e3baa0b338f..d795a9ddbb23 100644 --- a/stable/node-problem-detector/templates/daemonset.yaml +++ b/stable/node-problem-detector/templates/daemonset.yaml @@ -18,7 +18,7 @@ spec: app.kubernetes.io/name: {{ include "node-problem-detector.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} annotations: - checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + checksum/config: {{ include (print $.Template.BasePath "/custom-config-configmap.yaml") . | sha256sum }} scheduler.alpha.kubernetes.io/critical-pod: '' {{- if .Values.annotations }} {{ toYaml .Values.annotations | indent 8 }} @@ -33,7 +33,7 @@ spec: command: - "/bin/sh" - "-c" - - "exec /node-problem-detector --logtostderr --system-log-monitors={{- range $index, $monitor := .Values.settings.log_monitors }}{{if ne $index 0}},{{end}}/config/{{ $monitor }}{{- end }} {{- if .Values.settings.custom_plugin_monitors }} --custom-plugin-monitors={{- range $index, $monitor := .Values.settings.custom_plugin_monitors }}{{if ne $index 0}},{{end}}{{ $monitor }}{{- end }} {{- end }}" + - "exec /node-problem-detector --logtostderr --system-log-monitors={{- range $index, $monitor := .Values.settings.log_monitors }}{{if ne $index 0}},{{end}}{{ $monitor }}{{- end }} {{- if .Values.settings.custom_plugin_monitors }} --custom-plugin-monitors={{- range $index, $monitor := .Values.settings.custom_plugin_monitors }}{{if ne $index 0}},{{end}}{{ $monitor }}{{- end }} {{- end }}" securityContext: privileged: true env: @@ -47,8 +47,8 @@ spec: - name: localtime mountPath: /etc/localtime readOnly: true - - name: config - mountPath: /config + - name: custom-config + mountPath: /custom-config readOnly: true terminationGracePeriodSeconds: 30 resources: @@ -69,11 +69,6 @@ spec: hostPath: path: /etc/localtime type: "FileOrCreate" - - name: config + - name: custom-config configMap: - name: {{ include "node-problem-detector.fullname" . }} - items: - {{- range $index, $monitor := .Values.settings.log_monitors }} - - key: {{ $monitor }} - path: {{ $monitor }} - {{- end }} + name: {{ include "node-problem-detector.customConfig" . }} diff --git a/stable/node-problem-detector/values.yaml b/stable/node-problem-detector/values.yaml index bd08171eb5e2..fb2af3d8d81d 100644 --- a/stable/node-problem-detector/values.yaml +++ b/stable/node-problem-detector/values.yaml @@ -1,7 +1,36 @@ settings: + # Custom monitor definitions to add to Node Problem Detector - to be + # mounted at /custom-config. These are in addition to pre-packaged monitor + # definitions provided within the default docker image available at /config: + # https://github.com/kubernetes/node-problem-detector/tree/master/config + custom_monitor_definitions: {} + # docker-monitor-filelog.json: | + # { + # "plugin": "filelog", + # "pluginConfig": { + # "timestamp": "^time=\"(\\S*)\"", + # "message": "msg=\"([^\n]*)\"", + # "timestampFormat": "2006-01-02T15:04:05.999999999-07:00" + # }, + # "logPath": "/var/log/docker.log", + # "lookback": "5m", + # "bufferSize": 10, + # "source": "docker-monitor", + # "conditions": [], + # "rules": [ + # { + # "type": "temporary", + # "reason": "CorruptDockerImage", + # "pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*" + # } + # ] + # } log_monitors: - - kernel-monitor.json - - docker-monitor.json + - /config/kernel-monitor.json + - /config/docker-monitor.json + # An example of activating a custom log monitor definition in + # Node Problem Detector + # - /custom-config/docker-monitor-filelog.json custom_plugin_monitors: [] hostpath: