Skip to content

fix: revert jmx exporter removal #1176

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ All notable changes to this project will be documented in this file.
- nifi: Remove `2.2.0` ([#1114]).
- kafka: Remove `3.7.1` and `3.8.0` ([#1117]).
- spark-connect-client: Remove `3.5.5` ([#1142]).
- spark-k8s: Remove the JMX exporter jar ([#1157]).

[nifi-iceberg-bundle]: https://github.com/stackabletech/nifi-iceberg-bundle
[#1025]: https://github.com/stackabletech/docker-images/pull/1025
Expand Down Expand Up @@ -185,7 +184,6 @@ All notable changes to this project will be documented in this file.
[#1151]: https://github.com/stackabletech/docker-images/pull/1151
[#1152]: https://github.com/stackabletech/docker-images/pull/1152
[#1156]: https://github.com/stackabletech/docker-images/pull/1156
[#1157]: https://github.com/stackabletech/docker-images/pull/1157
[#1163]: https://github.com/stackabletech/docker-images/pull/1163
[#1165]: https://github.com/stackabletech/docker-images/pull/1165

Expand Down
12 changes: 12 additions & 0 deletions spark-k8s/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ ARG AZURE_KEYVAULT_CORE
ARG JACKSON_DATAFORMAT_XML
ARG STAX2_API
ARG WOODSTOX_CORE
ARG JMX_EXPORTER
ARG TARGETARCH
ARG TINI
ARG STACKABLE_USER_UID
Expand Down Expand Up @@ -205,6 +206,8 @@ RUN cp /stackable/spark-${PRODUCT}/connector/connect/server/target/spark-connect
&& cp /stackable/spark-${PRODUCT}/connector/connect/common/target/spark-connect-common_*-${PRODUCT}.jar . \
&& cp /stackable/spark-${PRODUCT}/connector/connect/client/jvm/target/spark-connect-client-jvm_2.12-${PRODUCT}.jar .

COPY spark-k8s/stackable/jmx /stackable/jmx

WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars

RUN <<EOF
Expand All @@ -221,8 +224,14 @@ curl --fail "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-$
-o /usr/bin/tini
chmod +x /usr/bin/tini

# JMX Exporter
curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" \
-o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar

chmod -R g=u /stackable/spark-${PRODUCT}/dist
chmod -R g=u /stackable/spark-${PRODUCT}/assembly/target/bom.json
chmod -R g=u /stackable/jmx
EOF

# TODO: java-base installs the Adoptium dnf repo and the Termurin jre which is not needed here.
Expand All @@ -232,6 +241,7 @@ FROM stackable/image/java-base AS final
ARG PRODUCT
ARG PYTHON
ARG RELEASE
ARG JMX_EXPORTER
ARG HBASE_CONNECTOR
ARG STACKABLE_USER_UID

Expand All @@ -257,6 +267,7 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRO
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder /stackable/spark-${PRODUCT}-src.tar.gz /stackable
COPY --chown=${STACKABLE_USER_UID}:0 --from=hbase-connectors-builder /stackable/hbase-connector-${HBASE_CONNECTOR}-src.tar.gz /stackable
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark-${PRODUCT}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}.cdx.json
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/jmx /stackable/jmx
COPY --from=spark-builder /usr/bin/tini /usr/bin/tini

COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/stackable/run-spark.sh /stackable/run-spark.sh
Expand Down Expand Up @@ -290,6 +301,7 @@ chown -h ${STACKABLE_USER_UID}:0 /stackable/spark/examples/jars/spark-examples.j

# fix permissions
chmod g=u /stackable/spark
chmod g=u /stackable/jmx
chmod g=u /stackable/run-spark.sh
EOF

Expand Down
132 changes: 132 additions & 0 deletions spark-k8s/stackable/jmx/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
---
rules:

# These come from the master
# Example: master.aliveWorkers
- pattern: "metrics<name=master\\.(.*), type=counters><>Value"
name: spark_master_$1

# These come from the worker
# Example: worker.coresFree
- pattern: "metrics<name=worker\\.(.*), type=counters><>Value"
name: spark_worker_$1

# These come from the application driver
# Example: app-20160809000059-0000.driver.DAGScheduler.stage.failedStages
- pattern: "metrics<name=(.*)\\.driver\\.(DAGScheduler|BlockManager|jvm)\\.(.*), type=gauges><>Value"
name: spark_driver_$2_$3
type: GAUGE
labels:
app_id: "$1"

# These come from the application driver
# Emulate timers for DAGScheduler like messagePRocessingTime
- pattern: "metrics<name=(.*)\\.driver\\.DAGScheduler\\.(.*), type=counters><>Count"
name: spark_driver_DAGScheduler_$2_total
type: COUNTER
labels:
app_id: "$1"

- pattern: "metrics<name=(.*)\\.driver\\.HiveExternalCatalog\\.(.*), type=counters><>Count"
name: spark_driver_HiveExternalCatalog_$2_total
type: COUNTER
labels:
app_id: "$1"

# These come from the application driver
# Emulate histograms for CodeGenerator
- pattern: "metrics<name=(.*)\\.driver\\.CodeGenerator\\.(.*), type=counters><>Count"
name: spark_driver_CodeGenerator_$2_total
type: COUNTER
labels:
app_id: "$1"

# These come from the application driver
# Emulate timer (keep only count attribute) plus counters for LiveListenerBus
- pattern: "metrics<name=(.*)\\.driver\\.LiveListenerBus\\.(.*), type=counters><>Count"
name: spark_driver_LiveListenerBus_$2_total
type: COUNTER
labels:
app_id: "$1"

# Get Gauge type metrics for LiveListenerBus
- pattern: "metrics<name=(.*)\\.driver\\.LiveListenerBus\\.(.*), type=gauges><>Value"
name: spark_driver_LiveListenerBus_$2
type: GAUGE
labels:
app_id: "$1"

# These come from the application driver if it's a streaming application
# Example: app-20160809000059-0000.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay
- pattern: "metrics<name=(.*)\\.driver\\.(.*)\\.StreamingMetrics\\.streaming\\.(.*), type=gauges><>Value"
name: spark_driver_streaming_$3
labels:
app_id: "$1"
app_name: "$2"

# These come from the application driver if it's a structured streaming application
# Example: app-20160809000059-0000.driver.spark.streaming.QueryName.inputRate-total
- pattern: "metrics<name=(.*)\\.driver\\.spark\\.streaming\\.(.*)\\.(.*), type=gauges><>Value"
name: spark_driver_structured_streaming_$3
labels:
app_id: "$1"
query_name: "$2"

# These come from the application executors
# Examples:
# app-20160809000059-0000.0.executor.threadpool.activeTasks (value)
# app-20160809000059-0000.0.executor.JvmGCtime (counter)

# filesystem metrics are declared as gauge metrics, but are actually counters
- pattern: "metrics<name=(.*)\\.(.*)\\.executor\\.filesystem\\.(.*), type=gauges><>Value"
name: spark_executor_filesystem_$3_total
type: COUNTER
labels:
app_id: "$1"
executor_id: "$2"

- pattern: "metrics<name=(.*)\\.(.*)\\.executor\\.(.*), type=gauges><>Value"
name: spark_executor_$3
type: GAUGE
labels:
app_id: "$1"
executor_id: "$2"

- pattern: "metrics<name=(.*)\\.(.*)\\.executor\\.(.*), type=counters><>Count"
name: spark_executor_$3_total
type: COUNTER
labels:
app_id: "$1"
executor_id: "$2"

- pattern: "metrics<name=(.*)\\.(.*)\\.ExecutorMetrics\\.(.*), type=gauges><>Value"
name: spark_executor_$3
type: GAUGE
labels:
app_id: "$1"
executor_id: "$2"

# These come from the application executors
# Example: app-20160809000059-0000.0.jvm.threadpool.activeTasks
- pattern: "metrics<name=(.*)\\.([0-9]+)\\.(jvm|NettyBlockTransfer)\\.(.*), type=gauges><>Value"
name: spark_executor_$3_$4
type: GAUGE
labels:
app_id: "$1"
executor_id: "$2"

- pattern: "metrics<name=(.*)\\.([0-9]+)\\.HiveExternalCatalog\\.(.*), type=counters><>Count"
name: spark_executor_HiveExternalCatalog_$3_total
type: COUNTER
labels:
app_id: "$1"
executor_id: "$2"

# These come from the application driver
# Emulate histograms for CodeGenerator
- pattern: "metrics<name=(.*)\\.([0-9]+)\\.CodeGenerator\\.(.*), type=counters><>Count"
name: spark_executor_CodeGenerator_$3_total
type: COUNTER
labels:
app_id: "$1"
executor_id: "$2"
2 changes: 2 additions & 0 deletions spark-k8s/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
"woodstox_core": "6.5.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
"vector": "0.47.0",
"jmx_exporter": "1.3.0",
"tini": "0.19.0",
"hbase_connector": "1.0.1",
},
Expand All @@ -30,6 +31,7 @@
"stax2_api": "4.2.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
"woodstox_core": "6.5.1", # https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2
"vector": "0.47.0",
"jmx_exporter": "1.3.0",
"tini": "0.19.0",
"hbase_connector": "1.0.1",
},
Expand Down