Skip to content

Commit

Permalink
Merge pull request cdapio#77 from cdapio/feature/jmx-metrics-sidecar-…
Browse files Browse the repository at this point in the history
…container

[CDAP-18715] Add service running in a sidecar container that polls JMX server on localhost to collect and publish system metrics
  • Loading branch information
Arjan Singh Bal authored Jan 31, 2022
2 parents 969e6b7 + 9526d8f commit ff44316
Show file tree
Hide file tree
Showing 9 changed files with 1,998 additions and 23 deletions.
19 changes: 19 additions & 0 deletions api/v1alpha1/cdapmaster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ type CDAPMasterSpec struct {
// To disable this service: either omit or set the field to nil
// To enable this service: set it to a pointer to a AuthenticationSpec struct (can be an empty struct)
Authentication *AuthenticationSpec `json:"authentication,omitempty"`
// SystemMetricsExporter is specification for the CDAP SystemMetricsExporter service.
// This is an optional service and may not be required for CDAP to be operational.
// To disable this service: either omit or set the field to nil
// To enable this service: set it to a pointer to a SystemMetricsExporterSpec struct (can be an empty struct).
// CDAPServiceSpec.EnableSystemMetrics field also needs to be set to true for stateful services which require
// collection of system metrics. Services which have CDAPServiceSpec.EnableSystemMetrics as nil, missing or set to false,
// will have metrics sidecar container disabled.
SystemMetricsExporter *SystemMetricExporterSpec `json:"systemMetricsExporter,omitempty"`
// SecurityContext defines the security context for all pods for all services.
SecurityContext *SecurityContext `json:"securityContext,omitempty"`
// AdditionalVolumes defines a list of additional volumes for all services.
Expand Down Expand Up @@ -136,6 +144,12 @@ type CDAPServiceSpec struct {
AdditionalVolumeMounts []corev1.VolumeMount `json:"additionalVolumeMounts,omitempty"`
// SecurityContext overrides the security context for the service pods.
SecurityContext *SecurityContext `json:"securityContext,omitempty"`
// EnableSystemMetrics is an optional field that is considered along with CDAPMasterSpec.SystemMetricsExporter
// to start a metrics collection container in statefulsets. SystemMetricsExporter is a global setting in CDAPMasterSpec.
// When SystemMetricsExporter is absent, it disables metrics collection for all stateful services.
// When SystemMetricsExporter is present, this value should also be set to true for services which require system metrics
// collection.
EnableSystemMetrics *bool `json:"enableSystemMetrics,omitempty"`
// Lifecycle is to specify Container Lifecycle hooks provided by Kubernetes for containers.
// This will not be applied to the init containers as init containers do not support lifecycle.
Lifecycle *corev1.Lifecycle `json:"lifecycle,omitempty"`
Expand Down Expand Up @@ -225,6 +239,11 @@ type SupportBundleSpec struct {
CDAPStatefulServiceSpec `json:",inline"`
}

// SystemMetricExporterSpec defines the specification for the SystemMetricsExporter service.
type SystemMetricExporterSpec struct {
CDAPServiceSpec `json:",inline"`
}

// CDAPMasterStatus defines the observed state of CDAPMaster
type CDAPMasterStatus struct {
status.Meta `json:",inline"`
Expand Down
26 changes: 26 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1,791 changes: 1,781 additions & 10 deletions config/crd/bases/cdap.cdap.io_cdapmasters.yaml

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions controllers/cdapmaster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ func ApplyDefaults(resource interface{}) {
spec.Config[confTwillSecurityWorkerSecretDiskPath] = defaultSecuritySecretPath
}

// Set the default JMX server port if not set and system metrics exporter sidecar is enabled
if _, ok := spec.Config[confJMXServerPort]; spec.SystemMetricsExporter != nil && !ok {
spec.Config[confJMXServerPort] = fmt.Sprint(defaultJMXport)
}

// Disable explore
spec.Config[confExploreEnabled] = "false"

Expand Down
14 changes: 14 additions & 0 deletions controllers/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ const (

// serviceUserInterface defines the service type for user interface
serviceUserInterface ServiceName = "UserInterface"

// serviceSystemMetricsExporter defines the service type for sidecar metrics collection service
serviceSystemMetricsExporter ServiceName = "SystemMetricsExporter"
)

const (
Expand All @@ -60,6 +63,7 @@ const (
confTwillSecurityMasterSecretDiskPath = "twill.security.master.secret.disk.path"
confTwillSecurityWorkerSecretDiskName = "twill.security.worker.secret.disk.name"
confTwillSecurityWorkerSecretDiskPath = "twill.security.worker.secret.disk.path"
confJMXServerPort = "jmx.metrics.collector.server.port"

// default values
defaultImage = "gcr.io/cdapio/cdap:latest"
Expand Down Expand Up @@ -106,6 +110,16 @@ const (
javaReservedNonHeap = int64(768 * 1024 * 1024)
javaMaxHeapSizeEnvVarName = "JAVA_HEAPMAX"

// System Metrics sidecar related
defaultJMXport = 11022
javaOptsEnvVarName = "OPTS"
// -Dcom.sun.management.jmxremote.host=localhost ensures that JMX server is bound to localhost
// and only requests from localhost can connect to it.
// -Djava.rmi.server.hostname=localhost is required for clients to use 127.0.0.1 to connect to rmi server
// instead of public IP
// TODO(CDAP-18783): Enable SSL and authentication for JMX
jmxServerOptFormat = "-Djava.rmi.server.hostname=localhost -Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.host=localhost -Dcom.sun.management.jmxremote.port=%s -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"

Bytes = int64(1)
kiloBytes = int64(1024)
megaBytes = int64(1024 * 1024)
Expand Down
66 changes: 56 additions & 10 deletions controllers/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,13 @@ func buildStatefulSets(master *v1alpha1.CDAPMaster, name string, services Servic
if ss == nil {
continue
}
env := addJavaMaxHeapEnvIfNotPresent(ss.Env, ss.Resources)
c := newContainerSpec(master, s, dataDir).setResources(ss.Resources).setEnv(env).setLifecycle(ss.Lifecycle)
if s == serviceUserInterface {
c = updateSpecForUserInterface(master, c)
}

c := serviceContainerSpec(ss, master, dataDir, s)
spec = spec.withContainer(c)
if err := addSystemMetricsServiceIfEnabled(spec, master, ss, dataDir, c); err != nil {
return nil, err
}

// Adding a label to allow NodePort service selector to find the pod
spec = spec.addLabel(labelContainerKeyPrefix+s, master.Name)

Expand Down Expand Up @@ -226,6 +227,45 @@ func buildStatefulSets(master *v1alpha1.CDAPMaster, name string, services Servic
return spec, nil
}

// addSystemMetricsServiceIfEnabled adds a sidecar container for
// SystemMetricsExporterService if enabled in service spec.
func addSystemMetricsServiceIfEnabled(stsSpec *StatefulSpec, master *v1alpha1.CDAPMaster,
service *v1alpha1.CDAPServiceSpec, dataDir string, mainContainer *ContainerSpec) error {
if master.Spec.SystemMetricsExporter == nil || service == nil {
return nil
}
if service.EnableSystemMetrics == nil {
return nil
}
if *service.EnableSystemMetrics == false {
return nil
}
ss, err := getCDAPServiceSpec(master, serviceSystemMetricsExporter)
if err != nil {
return err
}
c := serviceContainerSpec(ss, master, dataDir, serviceSystemMetricsExporter)
stsSpec = stsSpec.withContainer(c)
// add env variable to start jmx server in the main container
varAdded := false
varName := javaOptsEnvVarName
varValue := fmt.Sprintf(jmxServerOptFormat, master.Spec.Config[confJMXServerPort])
// find existing EnvVar with same name
for idx, env := range mainContainer.Env {
if env.Name != varName {
continue
}
mainContainer.Env[idx].Value += " " + varValue
varAdded = true
}
// add new env variable if variable doesn't exist
if !varAdded {
mainContainer.addEnv(varName, varValue)
varAdded = true
}
return nil
}

// Return a single single-/multi- container deployment containing a list of supplied services
func buildDeployment(master *v1alpha1.CDAPMaster, name string, services ServiceGroup, labels map[string]string, cconf, hconf, sysappconf, dataDir string) (*DeploymentSpec, error) {
objName := getObjName(master, name)
Expand Down Expand Up @@ -273,11 +313,7 @@ func buildDeployment(master *v1alpha1.CDAPMaster, name string, services ServiceG
if ss == nil {
continue
}
env := addJavaMaxHeapEnvIfNotPresent(ss.Env, ss.Resources)
c := newContainerSpec(master, s, dataDir).setResources(ss.Resources).setEnv(env).setLifecycle(ss.Lifecycle)
if s == serviceUserInterface {
c = updateSpecForUserInterface(master, c)
}
c := serviceContainerSpec(ss, master, dataDir, s)
spec = spec.withContainer(c)

// Adding a label to allow k8s service selector to easily find the pod
Expand Down Expand Up @@ -305,6 +341,16 @@ func buildDeployment(master *v1alpha1.CDAPMaster, name string, services ServiceG
return spec, nil
}

func serviceContainerSpec(ss *v1alpha1.CDAPServiceSpec,
master *v1alpha1.CDAPMaster, dataDir string, service ServiceName) *ContainerSpec {
env := addJavaMaxHeapEnvIfNotPresent(ss.Env, ss.Resources)
c := newContainerSpec(master, service, dataDir).setResources(ss.Resources).setEnv(env).setLifecycle(ss.Lifecycle)
if service == serviceUserInterface {
c = updateSpecForUserInterface(master, c)
}
return c
}

// Return a list of reconciler objects (e.g. statefulsets, deployment, NodePort service) for the given deployment plan
func buildObjectsForDeploymentPlan(spec *DeploymentPlanSpec) ([]reconciler.Object, error) {
var objs []reconciler.Object
Expand Down
17 changes: 15 additions & 2 deletions controllers/testdata/cdap_master_cr.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@
"router.bind.port": "11015",
"router.server.address": "cdap-test-router",
"security.authentication.handlerClassName": "io.cdap.cdap.security.server.BasicAuthenticationHandler",
"security.authentication.basic.realmfile": "/opt/cdap/basicauth/basicrealm"
"security.authentication.basic.realmfile": "/opt/cdap/basicauth/basicrealm",
"jmx.metrics.collector.server.port": "1234"
},
"configMapVolumes": {
"my-config-map-1": "/my/config/map/1",
Expand Down Expand Up @@ -208,7 +209,8 @@
"cpu": "100m",
"memory": "100Mi"
}
}
},
"enableSystemMetrics": true
},
"router": {
"metadata": {
Expand Down Expand Up @@ -238,6 +240,17 @@
},
"storageSize": "100Gi"
},
"systemMetricsExporter": {
"metadata": {
"creationTimestamp": null
},
"resources": {
"requests": {
"cpu": "100m",
"memory": "200Mi"
}
}
},
"securitySecret": "cdap-secret",
"serviceAccountName": "cdap",
"userInterface": {
Expand Down
2 changes: 1 addition & 1 deletion controllers/testdata/messaging.json
Original file line number Diff line number Diff line change
Expand Up @@ -344,4 +344,4 @@
"updateRevision": "cdap-test-messaging-68cf487786",
"updatedReplicas": 1
}
}
}
81 changes: 81 additions & 0 deletions controllers/testdata/runtime.json
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@
{
"name": "JAVA_HEAPMAX",
"value": "-Xmx62914560"
},
{
"name": "OPTS",
"value": "-Djava.rmi.server.hostname=localhost -Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.host=localhost -Dcom.sun.management.jmxremote.port=1234 -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
}
],
"image": "gcr.io/cloud-data-fusion-images/cloud-data-fusion:6.1.0.5",
Expand Down Expand Up @@ -143,6 +147,83 @@
"mountPath": "/mnt/test"
}
]
},
{
"args": [
"io.cdap.cdap.master.environment.k8s.SystemMetricsExporterServiceMain",
"--env=k8s"
],
"env": [
{
"name": "JAVA_HEAPMAX",
"value": "-Xmx125829120"
}
],
"image": "gcr.io/cloud-data-fusion-images/cloud-data-fusion:6.1.0.5",
"imagePullPolicy": "IfNotPresent",
"name": "systemmetricsexporter",
"resources": {
"requests": {
"cpu": "100m",
"memory": "200Mi"
}
},
"securityContext": {
"privileged": false,
"readOnlyRootFilesystem": false,
"allowPrivilegeEscalation": false
},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File",
"volumeMounts": [
{
"mountPath": "/etc/podinfo",
"name": "podinfo",
"readOnly": true
},
{
"mountPath": "/etc/cdap/conf",
"name": "cdap-conf",
"readOnly": true
},
{
"mountPath": "/etc/hadoop/conf",
"name": "hadoop-conf",
"readOnly": true
},
{
"mountPath": "/opt/cdap/master/system-app-config",
"name": "cdap-sysappconf",
"readOnly": true
},
{
"mountPath": "/data",
"name": "cdap-test-runtime-data",
"readOnly": true
},
{
"mountPath": "/etc/cdap/security",
"name": "cdap-security",
"readOnly": true
},
{
"name": "cdap-cm-vol-my-config-map-1",
"mountPath": "/my/config/map/1"
},
{
"name": "cdap-cm-vol-my-config-map-2",
"mountPath": "/my/config/map/2"
},
{
"mountPath": "/my/secret/1",
"name": "cdap-se-vol-my-secret-1"
},
{
"name": "test-volume",
"readOnly": true,
"mountPath": "/mnt/test"
}
]
}
],
"initContainers": [
Expand Down

0 comments on commit ff44316

Please sign in to comment.