Skip to content

Commit cc57146

Browse files
authored
feat: connect client image includes JupyterLab (#1071)
* feat: install demo dependencies * spark-connect-client is now built directly off of spark-k8s * run pre-commit hooks * fix shellcheck sc2102
1 parent 2a79d33 commit cc57146

File tree

3 files changed

+1848
-22
lines changed

3 files changed

+1848
-22
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ All notable changes to this project will be documented in this file.
3636

3737
- ubi-rust-builder: Bump Rust toolchain to 1.85.0, cargo-cyclonedx to 0.5.7, and cargo-auditable to 0.6.6 ([#1050]).
3838
- spark-k8s: Include spark-connect jars. Replace OpenJDK with Temurin JDK. Cleanup. ([#1034])
39+
- spark-connect-client: Image is now completely based on spark-k8s and includes JupyterLab and other demo dependencies ([#1071])
3940

4041
### Fixed
4142

spark-connect-client/Dockerfile

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
# spark-builder: provides client libs for spark-connect
44
FROM stackable/image/spark-k8s AS spark-builder
55

6-
FROM stackable/image/java-base
7-
86
ARG PRODUCT
97
ARG PYTHON
108
ARG RELEASE
@@ -18,42 +16,36 @@ LABEL name="Stackable Spark Connect Examples" \
1816
summary="Spark Connect Examples" \
1917
description="Spark Connect client libraries for Python and the JVM, including some examples."
2018

19+
# Need root to install setuptools
20+
USER root
2121

22-
ENV HOME=/stackable
23-
24-
COPY spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
25-
COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-builder /stackable/spark/connect /stackable/spark/connect
22+
COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/spark-connect-examples /stackable/spark-connect-examples
23+
COPY --chown=${STACKABLE_USER_UID}:0 spark-connect-client/stackable/.jupyter /stackable/.jupyter
2624

2725
RUN <<EOF
2826
microdnf update
2927
# python{version}-setuptools: needed to build the pyspark[connect] package
3028
microdnf install --nodocs \
31-
"python${PYTHON}" \
32-
"python${PYTHON}-pip" \
3329
"python${PYTHON}-setuptools"
3430
microdnf clean all
3531
rm -rf /var/cache/yum
3632

37-
ln -s /usr/bin/python${PYTHON} /usr/bin/python
38-
ln -s /usr/bin/pip-${PYTHON} /usr/bin/pip
39-
40-
# Install python libraries for the spark connect client
41-
# shellcheck disable=SC2102
42-
pip install --no-cache-dir pyspark[connect]==${PRODUCT}
43-
4433
# All files and folders owned by root group to support running as arbitrary users.
4534
# This is best practice as all container users will belong to the root group (0).
4635
chown -R ${STACKABLE_USER_UID}:0 /stackable
4736
chmod -R g=u /stackable
4837
EOF
4938

50-
# ----------------------------------------
51-
# Attention: We are changing the group of all files in /stackable directly above
52-
# If you do any file based actions (copying / creating etc.) below this comment you
53-
# absolutely need to make sure that the correct permissions are applied!
54-
# chown ${STACKABLE_USER_UID}:0
55-
# ----------------------------------------
56-
5739
USER ${STACKABLE_USER_UID}
5840

41+
# Install python packages.
42+
# Packages are intentionally installed in "user mode" to reduce the container attack surface.
43+
# - pyspark[connect] = spark connect client libs
44+
# - jupyterlab = notebook client used in demos
45+
RUN pip install --no-cache-dir --user \
46+
"pyspark[connect]==${PRODUCT}" \
47+
"jupyterlab==4.4.1" \
48+
"scikit-learn==1.3.1" \
49+
"matplotlib==3.10.1"
50+
5951
WORKDIR /stackable/spark-connect-examples/python

0 commit comments

Comments
 (0)