Skip to content

Commit 75b9266

Browse files
authored
Feature: Add pprof endpoint (#2164)
* add pprof support to the operator Controller Manager Signed-off-by: ImpSy <3097030+ImpSy@users.noreply.github.com> * add pprof support to helm chart Signed-off-by: ImpSy <3097030+ImpSy@users.noreply.github.com> --------- Signed-off-by: ImpSy <3097030+ImpSy@users.noreply.github.com>
1 parent 9f0c08a commit 75b9266

File tree

8 files changed

+134
-1
lines changed

8 files changed

+134
-1
lines changed

charts/spark-operator-chart/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum
114114
| controller.sidecars | list | `[]` | Sidecar containers for controller pods. |
115115
| controller.podDisruptionBudget.enable | bool | `false` | Specifies whether to create pod disruption budget for controller. Ref: [Specifying a Disruption Budget for your Application](https://kubernetes.io/docs/tasks/run-application/configure-pdb/) |
116116
| controller.podDisruptionBudget.minAvailable | int | `1` | The number of pods that must be available. Require `controller.replicas` to be greater than 1 |
117+
| controller.pprof.enable | bool | `false` | Specifies whether to enable pprof. |
118+
| controller.pprof.port | int | `6060` | Specifies pprof port. |
119+
| controller.pprof.portName | string | `"pprof"` | Specifies pprof service port name. |
117120
| webhook.enable | bool | `true` | Specifies whether to enable webhook. |
118121
| webhook.replicas | int | `1` | Number of replicas of webhook server. |
119122
| webhook.logLevel | string | `"info"` | Configure the verbosity of logging, can be one of `debug`, `info`, `error`. |

charts/spark-operator-chart/templates/controller/_helpers.tpl

+7
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ Create the name of the pod disruption budget to be used by controller
9797
{{ include "spark-operator.controller.name" . }}-pdb
9898
{{- end -}}
9999

100+
{{/*
101+
Create the name of the service used by controller
102+
*/}}
103+
{{- define "spark-operator.controller.serviceName" -}}
104+
{{ include "spark-operator.controller.name" . }}-svc
105+
{{- end -}}
106+
100107
{{/*
101108
Create the role policy rules for the controller in every Spark job namespace
102109
*/}}

charts/spark-operator-chart/templates/controller/deployment.yaml

+10-1
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,20 @@ spec:
9191
- --leader-election=true
9292
- --leader-election-lock-name={{ include "spark-operator.controller.leaderElectionName" . }}
9393
- --leader-election-lock-namespace={{ .Release.Namespace }}
94-
{{- if .Values.prometheus.metrics.enable }}
94+
{{- if .Values.controller.pprof.enable }}
95+
- --pprof-bind-address=:{{ .Values.controller.pprof.port }}
96+
{{- end }}
97+
{{- if or .Values.prometheus.metrics.enable .Values.controller.pprof.enable }}
9598
ports:
99+
{{- if .Values.controller.pprof.enable }}
100+
- name: {{ .Values.controller.pprof.portName | quote }}
101+
containerPort: {{ .Values.controller.pprof.port }}
102+
{{- end }}
103+
{{- if .Values.prometheus.metrics.enable }}
96104
- name: {{ .Values.prometheus.metrics.portName | quote }}
97105
containerPort: {{ .Values.prometheus.metrics.port }}
98106
{{- end }}
107+
{{- end }}
99108
{{- with .Values.controller.env }}
100109
env:
101110
{{- toYaml . | nindent 8 }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{{/*
2+
Copyright 2024 The Kubeflow authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
https://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/}}
16+
17+
{{- if .Values.controller.pprof.enable }}
18+
apiVersion: v1
19+
kind: Service
20+
metadata:
21+
name: {{ include "spark-operator.controller.serviceName" . }}
22+
labels:
23+
{{- include "spark-operator.controller.labels" . | nindent 4 }}
24+
spec:
25+
selector:
26+
{{- include "spark-operator.controller.selectorLabels" . | nindent 4 }}
27+
ports:
28+
- port: {{ .Values.controller.pprof.port }}
29+
targetPort: {{ .Values.controller.pprof.portName | quote }}
30+
name: {{ .Values.controller.pprof.portName }}
31+
{{- end }}

charts/spark-operator-chart/tests/controller/deployment_test.yaml

+26
Original file line numberDiff line numberDiff line change
@@ -567,3 +567,29 @@ tests:
567567
asserts:
568568
- failedTemplate:
569569
errorMessage: "controller.replicas must be greater than 1 to enable topology spread constraints for controller pods"
570+
571+
- it: Should contain `--pprof-bind-address` arg if `controller.pprof.enable` is set to `true`
572+
set:
573+
controller:
574+
pprof:
575+
enable: true
576+
port: 12345
577+
asserts:
578+
- contains:
579+
path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args
580+
content: --pprof-bind-address=:12345
581+
582+
- it: Should add pprof ports if `controller.pprof.enable` is set to `true`
583+
set:
584+
controller:
585+
pprof:
586+
enable: true
587+
port: 12345
588+
portName: pprof-test
589+
asserts:
590+
- contains:
591+
path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].ports
592+
content:
593+
name: pprof-test
594+
containerPort: 12345
595+
count: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#
2+
# Copyright 2024 The Kubeflow authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# https://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
suite: Test controller deployment
18+
19+
templates:
20+
- controller/service.yaml
21+
22+
release:
23+
name: spark-operator
24+
namespace: spark-operator
25+
26+
tests:
27+
- it: Should create the pprof service correctly
28+
set:
29+
controller:
30+
pprof:
31+
enable: true
32+
port: 12345
33+
portName: pprof-test
34+
asserts:
35+
- containsDocument:
36+
apiVersion: v1
37+
kind: Service
38+
name: spark-operator-controller-svc
39+
- equal:
40+
path: spec.ports[0]
41+
value:
42+
port: 12345
43+
targetPort: pprof-test
44+
name: pprof-test

charts/spark-operator-chart/values.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,14 @@ controller:
170170
# Require `controller.replicas` to be greater than 1
171171
minAvailable: 1
172172

173+
pprof:
174+
# -- Specifies whether to enable pprof.
175+
enable: false
176+
# -- Specifies pprof port.
177+
port: 6060
178+
# -- Specifies pprof service port name.
179+
portName: pprof
180+
173181
webhook:
174182
# -- Specifies whether to enable webhook.
175183
enable: true

cmd/operator/controller/start.go

+5
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ var (
101101
metricsJobStartLatencyBuckets []float64
102102

103103
healthProbeBindAddress string
104+
pprofBindAddress string
104105
secureMetrics bool
105106
enableHTTP2 bool
106107
development bool
@@ -161,6 +162,9 @@ func NewStartCommand() *cobra.Command {
161162
command.Flags().BoolVar(&secureMetrics, "secure-metrics", false, "If set the metrics endpoint is served securely")
162163
command.Flags().BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers")
163164

165+
command.Flags().StringVar(&pprofBindAddress, "pprof-bind-address", "0", "The address the pprof endpoint binds to. "+
166+
"If not set, it will be 0 in order to disable the pprof server")
167+
164168
flagSet := flag.NewFlagSet("controller", flag.ExitOnError)
165169
ctrl.RegisterFlags(flagSet)
166170
zapOptions.BindFlags(flagSet)
@@ -193,6 +197,7 @@ func start() {
193197
TLSOpts: tlsOptions,
194198
}),
195199
HealthProbeBindAddress: healthProbeBindAddress,
200+
PprofBindAddress: pprofBindAddress,
196201
LeaderElection: enableLeaderElection,
197202
LeaderElectionID: leaderElectionLockName,
198203
LeaderElectionNamespace: leaderElectionLockNamespace,

0 commit comments

Comments
 (0)