Skip to content

Commit 56e910e

Browse files
authored
Merge branch 'main' into feat/bump-druid-opa-authorizer-pre-25.7.0
2 parents 93a9f84 + dbfc7fb commit 56e910e

File tree

5 files changed

+56
-53
lines changed

5 files changed

+56
-53
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,15 @@ All notable changes to this project will be documented in this file.
6969
- containerdebug updated to 0.2.0 ([#1128])
7070
- Build Hadoop as `stackable` and configure the Stackable Nexus build-repo for the `root` user ([#1133])
7171
- patchable: The base branch is now configured as the git upstream branch ([#1131]).
72+
- airflow: Updates the entrypoint script and removes the check for GID == 0 ([#1138])
7273
- druid: Bump druiod-opa-authorizer to `0.7.0` ([#1139]).
7374

7475
### Fixed
7576

7677
- airflow: Pin Cython version ([#1116]).
7778
- druid: reduce docker image size by removing the recursive chown/chmods in the final image ([#1039]).
7879
- hadoop: reduce docker image size by removing the recursive chown/chmods in the final image ([#1029]).
80+
- hadoop: adapt the JMX exporter configuration to also export boolean metrics ([#1140]).
7981
- hbase: reduce docker image size by removing the recursive chown/chmods in the final image ([#1028]).
8082
- hive: reduce docker image size by removing the recursive chown/chmods in the final image ([#1040]).
8183
- kafka: reduce docker image size by removing the recursive chown/chmods in the final image ([#1041]).
@@ -159,6 +161,7 @@ All notable changes to this project will be documented in this file.
159161
[#1131]: https://github.com/stackabletech/docker-images/pull/1131
160162
[#1133]: https://github.com/stackabletech/docker-images/pull/1133
161163
[#1137]: https://github.com/stackabletech/docker-images/pull/1137
164+
[#1138]: https://github.com/stackabletech/docker-images/pull/1138
162165
[#1139]: https://github.com/stackabletech/docker-images/pull/1139
163166

164167
## [25.3.0] - 2025-03-21

airflow/stackable/utils/entrypoint.sh

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919

2020
# Stackable notes:
2121
# Source of this file is the upstream Apache Airflow project
22-
# https://github.com/apache/airflow/blob/main/scripts/docker/entrypoint_prod.sh
23-
# It was last synced from the upstream repo on 2023-07-31 and is up-to-date as of commit 86193f5
24-
22+
# https://github.com/apache/airflow/blob/dc271d0c604ca1836ee4f943726b2d436547700f/scripts/docker/entrypoint_prod.sh
23+
# It was last synced from the upstream repo on 2025-06-01 and is up-to-date as of commit dc271d0c604ca1836ee4f943726b2d436547700f
24+
# Changes we made are denoted with a comment "STACKABLE PATCH BEGIN" and # STACKABLE PATCH END
2525

2626
AIRFLOW_COMMAND="${1:-}"
2727

@@ -34,9 +34,12 @@ set -euo pipefail
3434
# The side effect of this is slightly (in the range of 100s of milliseconds) slower load for any
3535
# binary started and a little memory used for Heap allocated by initialization of libstdc++
3636
# This overhead is not happening for binaries that already link dynamically libstdc++
37+
38+
# STACKABLE PATCH BEGIN
39+
# The path to this file is different on UBI
3740
# LD_PRELOAD="/usr/lib/$(uname -m)-linux-gnu/libstdc++.so.6"
38-
# Stackable: The path to this file is different on UBI
3941
LD_PRELOAD=/usr/lib64/libstdc++.so.6
42+
# STACKABLE PATCH END
4043
export LD_PRELOAD
4144

4245
function run_check_with_retries {
@@ -161,13 +164,17 @@ function create_www_user() {
161164
exit 1
162165
fi
163166

164-
airflow users create \
165-
--username "${_AIRFLOW_WWW_USER_USERNAME="admin"}" \
166-
--firstname "${_AIRFLOW_WWW_USER_FIRSTNAME="Airflow"}" \
167-
--lastname "${_AIRFLOW_WWW_USER_LASTNAME="Admin"}" \
168-
--email "${_AIRFLOW_WWW_USER_EMAIL="airflowadmin@example.com"}" \
169-
--role "${_AIRFLOW_WWW_USER_ROLE="Admin"}" \
170-
--password "${local_password}" || true
167+
if airflow config get-value core auth_manager | grep -q "FabAuthManager"; then
168+
airflow users create \
169+
--username "${_AIRFLOW_WWW_USER_USERNAME="admin"}" \
170+
--firstname "${_AIRFLOW_WWW_USER_FIRSTNAME="Airflow"}" \
171+
--lastname "${_AIRFLOW_WWW_USER_LASTNAME="Admin"}" \
172+
--email "${_AIRFLOW_WWW_USER_EMAIL="airflowadmin@example.com"}" \
173+
--role "${_AIRFLOW_WWW_USER_ROLE="Admin"}" \
174+
--password "${local_password}" || true
175+
else
176+
echo "Skipping user creation as auth manager different from Fab is used"
177+
fi
171178
}
172179

173180
function create_system_user_if_missing() {
@@ -193,7 +200,7 @@ function set_pythonpath_for_root_user() {
193200
# Now also adds applications installed as local user "airflow".
194201
if [[ $UID == "0" ]]; then
195202
local python_major_minor
196-
python_major_minor="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)"
203+
python_major_minor=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
197204
export PYTHONPATH="${AIRFLOW_USER_HOME_DIR}/.local/lib/python${python_major_minor}/site-packages:${PYTHONPATH:-}"
198205
>&2 echo "The container is run as root user. For security, consider using a regular user account."
199206
fi
@@ -204,9 +211,9 @@ function wait_for_airflow_db() {
204211
run_check_with_retries "airflow db check"
205212
}
206213

207-
function upgrade_db() {
208-
# Runs airflow db upgrade
209-
airflow db upgrade || true
214+
function migrate_db() {
215+
# Runs airflow db migrate
216+
airflow db migrate || true
210217
}
211218

212219
function wait_for_celery_broker() {
@@ -272,7 +279,10 @@ function check_uid_gid() {
272279
# not set when PIP is run by Airflow later on
273280
unset PIP_USER
274281

275-
check_uid_gid
282+
# STACKABLE PATCH BEGIN
283+
# Disable check for uid & gid (https://github.com/stackabletech/issues/issues/645)
284+
# check_uid_gid
285+
# STACKABLE PATCH END
276286

277287
# Set umask to 0002 to make all the directories created by the current user group-writeable
278288
# This allows the same directories to be writeable for any arbitrary user the image will be
@@ -292,8 +302,12 @@ if [[ "${CONNECTION_CHECK_MAX_COUNT}" -gt "0" ]]; then
292302
wait_for_airflow_db
293303
fi
294304

305+
if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] || [[ -n "${_AIRFLOW_DB_MIGRATE=}" ]] ; then
306+
migrate_db
307+
fi
308+
295309
if [[ -n "${_AIRFLOW_DB_UPGRADE=}" ]] ; then
296-
upgrade_db
310+
>&2 echo "WARNING: Environment variable '_AIRFLOW_DB_UPGRADE' is deprecated please use '_AIRFLOW_DB_MIGRATE' instead"
297311
fi
298312

299313
if [[ -n "${_AIRFLOW_WWW_USER_CREATE=}" ]] ; then
@@ -310,10 +324,13 @@ if [[ -n "${_PIP_ADDITIONAL_REQUIREMENTS=}" ]] ; then
310324
>&2 echo " https://airflow.apache.org/docs/docker-stack/build.html"
311325
>&2 echo
312326
>&2 echo " Adding requirements at container startup is fragile and is done every time"
313-
>&2 echo " the container starts, so it is onlny useful for testing and trying out"
327+
>&2 echo " the container starts, so it is only useful for testing and trying out"
314328
>&2 echo " of adding dependencies."
315329
>&2 echo
316-
pip install --root-user-action ignore --no-cache-dir "${_PIP_ADDITIONAL_REQUIREMENTS}"
330+
# STACKABLE PATCH BEGIN
331+
# Add double quotes to silence Shellcheck warning SC2086
332+
pip install --root-user-action ignore "${_PIP_ADDITIONAL_REQUIREMENTS}"
333+
# STACKABLE PATCH END
317334
fi
318335

319336

hadoop/stackable/jmx/datanode.yaml

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@ blacklistObjectNames:
1212
- 'Hadoop:service=DataNode,name=UgiMetrics'
1313
rules:
1414
# MetricsSystem
15-
- pattern: 'Hadoop<service=(.*), name=MetricsSystem, sub=(.*)><>(.*): (\d+)'
15+
- pattern: 'Hadoop<service=(.*), name=MetricsSystem, sub=(.*)><>(.*):'
1616
attrNameSnakeCase: true
1717
name: hadoop_$1_$3
18-
value: $4
1918
labels:
2019
service: HDFS
2120
role: $1
@@ -24,21 +23,19 @@ rules:
2423
type: GAUGE
2524
# FSDatasetState with _total suffix (also extracts the FSDataset ID),
2625
# e.g. Hadoop:name=FSDatasetState,attribute=EstimatedCapacityLostTotal
27-
- pattern: 'Hadoop<service=(.*), name=FSDatasetState-(.*)><>(.*_total): (\d+)'
26+
- pattern: 'Hadoop<service=(.*), name=FSDatasetState-(.*)><>(.*_total):'
2827
attrNameSnakeCase: true
2928
name: hadoop_$1_$3
30-
value: $4
3129
labels:
3230
service: HDFS
3331
role: $1
3432
fsdatasetid: $2
3533
kind: 'FSDatasetState'
3634
type: COUNTER
3735
# FSDatasetState (also extracts the FSDataset ID)
38-
- pattern: 'Hadoop<service=(.*), name=FSDatasetState-(.*)><>(.*): (\d+)'
36+
- pattern: 'Hadoop<service=(.*), name=FSDatasetState-(.*)><>(.*):'
3937
attrNameSnakeCase: true
4038
name: hadoop_$1_$3
41-
value: $4
4239
labels:
4340
service: HDFS
4441
role: $1
@@ -47,21 +44,19 @@ rules:
4744
type: GAUGE
4845
# DataNodeActivity with _info suffix (also extracts hostname and port),
4946
# e.g. Hadoop:name=DataNodeActivity-hdfs-datanode-default-0-9866,attribute=BlocksGetLocalPathInfo
50-
- pattern: 'Hadoop<service=(.*), name=DataNodeActivity-(.*)-(\d+)><>(.*_info): (\d+)'
47+
- pattern: 'Hadoop<service=(.*), name=DataNodeActivity-(.*)-(\d+)><>(.*_info):'
5148
attrNameSnakeCase: true
5249
name: hadoop_$1_$4_
53-
value: $5
5450
labels:
5551
service: HDFS
5652
role: $1
5753
host: $2
5854
port: $3
5955
kind: 'DataNodeActivity'
6056
type: GAUGE
61-
- pattern: 'Hadoop<service=(.*), name=DataNodeActivity-(.*)-(\d+)><>(.*): (\d+)'
57+
- pattern: 'Hadoop<service=(.*), name=DataNodeActivity-(.*)-(\d+)><>(.*):'
6258
attrNameSnakeCase: true
6359
name: hadoop_$1_$4
64-
value: $5
6560
labels:
6661
service: HDFS
6762
role: $1
@@ -70,31 +65,28 @@ rules:
7065
kind: 'DataNodeActivity'
7166
type: GAUGE
7267
# Generic counter, e.g. Hadoop:name=FSDatasetState,attribute=EstimatedCapacityLostTotal
73-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_total): (\d+)'
68+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_total):'
7469
attrNameSnakeCase: true
7570
name: hadoop_$1_$3
76-
value: $4
7771
labels:
7872
service: HDFS
7973
role: $1
8074
kind: $2
8175
type: COUNTER
8276
# Metrics suffixed with _info, e.g. Hadoop:name=JvmMetrics,attribute=LogInfo
8377
# The suffix _info is reserved for static information, therefore an underscore is appended.
84-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_info): (.*)'
78+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_info):'
8579
attrNameSnakeCase: true
8680
name: hadoop_$1_$3_
87-
value: $4
8881
labels:
8982
service: HDFS
9083
role: $1
9184
kind: $2
9285
type: GAUGE
9386
# All other Hadoop metrics
94-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*): (.*)'
87+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*):'
9588
attrNameSnakeCase: true
9689
name: hadoop_$1_$3
97-
value: $4
9890
labels:
9991
service: HDFS
10092
role: $1

hadoop/stackable/jmx/journalnode.yaml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@ blacklistObjectNames:
1313
- 'Hadoop:service=JournalNode,name=UgiMetrics'
1414
rules:
1515
# MetricsSystem
16-
- pattern: 'Hadoop<service=(.*), name=MetricsSystem, sub=(.*)><>(.*): (\d+)'
16+
- pattern: 'Hadoop<service=(.*), name=MetricsSystem, sub=(.*)><>(.*):'
1717
attrNameSnakeCase: true
1818
name: hadoop_$1_$3
19-
value: $4
2019
labels:
2120
service: HDFS
2221
role: $1
@@ -25,20 +24,18 @@ rules:
2524
type: GAUGE
2625
# Metrics suffixed with _info, e.g. Hadoop:name=JvmMetrics,attribute=LogInfo
2726
# The suffix _info is reserved for static information, therefore an underscore is appended.
28-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_info): (.*)'
27+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_info):'
2928
attrNameSnakeCase: true
3029
name: hadoop_$1_$3_
31-
value: $4
3230
labels:
3331
service: HDFS
3432
role: $1
3533
kind: $2
3634
type: GAUGE
3735
# All other Hadoop metrics
38-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*): (.*)'
36+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*):'
3937
attrNameSnakeCase: true
4038
name: hadoop_$1_$3
41-
value: $4
4239
labels:
4340
service: HDFS
4441
role: $1

hadoop/stackable/jmx/namenode.yaml

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,63 +13,57 @@ blacklistObjectNames:
1313
- 'Hadoop:service=NameNode,name=UgiMetrics'
1414
rules:
1515
# MetricsSystem
16-
- pattern: 'Hadoop<service=(.*), name=MetricsSystem, sub=(.*)><>(.*): (\d+)'
16+
- pattern: 'Hadoop<service=(.*), name=MetricsSystem, sub=(.*)><>(.*):'
1717
attrNameSnakeCase: true
1818
name: hadoop_$1_$3
19-
value: $4
2019
labels:
2120
service: HDFS
2221
role: $1
2322
kind: 'MetricsSystem'
2423
sub: $2
2524
type: GAUGE
2625
# Total raw capacity in bytes, e.g. Hadoop:name=NameNodeInfo,attribute=Total
27-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(total): (\d+)'
26+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(total):'
2827
attrNameSnakeCase: true
2928
name: hadoop_$1_$3
30-
value: $4
3129
labels:
3230
service: HDFS
3331
role: $1
3432
kind: $2
3533
type: COUNTER
3634
# Generic counter, e.g. Hadoop:name=FSNamesystem,attribute=FilesTotal
37-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_total): (\d+)'
35+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_total):'
3836
attrNameSnakeCase: true
3937
name: hadoop_$1_$3
40-
value: $4
4138
labels:
4239
service: HDFS
4340
role: $1
4441
kind: $2
4542
type: COUNTER
4643
# Metrics suffixed with _created, e.g. Hadoop:name=NameNodeActivity,attribute=FilesCreated
4744
# The suffix _created is reserved for timestamps, therefore an underscore is appended.
48-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_created): (.*)'
45+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_created):'
4946
attrNameSnakeCase: true
5047
name: hadoop_$1_$3_
51-
value: $4
5248
labels:
5349
service: HDFS
5450
role: $1
5551
kind: $2
5652
type: GAUGE
5753
# Metrics suffixed with _info, e.g. Hadoop:name=JvmMetrics,attribute=LogInfo
5854
# The suffix _info is reserved for static information, therefore an underscore is appended.
59-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_info): (.*)'
55+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*_info):'
6056
attrNameSnakeCase: true
6157
name: hadoop_$1_$3_
62-
value: $4
6358
labels:
6459
service: HDFS
6560
role: $1
6661
kind: $2
6762
type: GAUGE
6863
# All other Hadoop metrics
69-
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*): (.*)'
64+
- pattern: 'Hadoop<service=(.*), name=(.*)><>(.*):'
7065
attrNameSnakeCase: true
7166
name: hadoop_$1_$3
72-
value: $4
7367
labels:
7468
service: HDFS
7569
role: $1

0 commit comments

Comments
 (0)