Skip to content

Commit 0d980b4

Browse files
HyukjinKwonattilapiros
authored andcommitted
[SPARK-48763][CONNECT][BUILD] Move connect server and common to builtin module
### What changes were proposed in this pull request? This PR proposes to move the connect server to builtin module. From: ``` connector/connect/server connector/connect/common ``` To: ``` connect/server connect/common ``` ### Why are the changes needed? So the end users do not have to specify `--packages` when they start the Spark Connect server. Spark Connect client remains as a separate module. This was also pointed out in apache#39928 (comment). ### Does this PR introduce _any_ user-facing change? Yes, users don't have to specify `--packages` anymore. ### How was this patch tested? CI in this PR should verify them. Also manually tested several basic commands such as: - Maven build - SBT build - Running basic Scala client commands ```bash cd connector/connect bin/spark-connect bin/spark-connect-scala-client ``` - Running basic PySpark client commands ```bash bin/pyspark --remote local ``` - Connecting to the server launched by `./sbin/start-connect-server.sh` ```bash ./sbin/start-connect-server.sh bin/pyspark --remote "sc://localhost" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#47157 from HyukjinKwon/move-connect-server-builtin. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent 939c2d6 commit 0d980b4

File tree

2,257 files changed

+95
-198
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,257 files changed

+95
-198
lines changed

.github/labeler.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ DEPLOY:
225225
CONNECT:
226226
- changed-files:
227227
- any-glob-to-any-file: [
228+
'connect/**/*',
228229
'connector/connect/**/*',
229230
'python/pyspark/sql/**/connect/**/*',
230231
'python/pyspark/ml/**/connect/**/*'

.github/workflows/build_and_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ jobs:
606606
- name: Breaking change detection against branch-3.5
607607
uses: bufbuild/buf-breaking-action@v1
608608
with:
609-
input: connector/connect/common/src/main
609+
input: connect/common/src/main
610610
against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
611611
- name: Install Python 3.9
612612
uses: actions/setup-python@v5

.github/workflows/build_python_connect.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ jobs:
8484
# Start a Spark Connect server for local
8585
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
8686
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
87-
--jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
87+
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
8888
8989
# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
9090
mv python/lib lib.back
@@ -104,7 +104,7 @@ jobs:
104104
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
105105
--master "local-cluster[2, 4, 1024]" \
106106
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
107-
--jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
107+
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
108108
109109
# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
110110
mv python/lib lib.back

.github/workflows/build_python_connect35.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ jobs:
8787
# Start a Spark Connect server for local
8888
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
8989
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
90-
--jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
90+
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
9191
9292
# Checkout to branch-3.5 to use the tests in branch-3.5.
9393
cd ..

.github/workflows/maven_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ jobs:
194194
if [[ "$INCLUDED_TAGS" != "" ]]; then
195195
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
196196
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
197-
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
197+
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connect/common,connect/server test -fae
198198
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
199199
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
200200
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then

assembly/pom.xml

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,41 @@
7474
<artifactId>spark-repl_${scala.binary.version}</artifactId>
7575
<version>${project.version}</version>
7676
</dependency>
77+
<dependency>
78+
<groupId>org.apache.spark</groupId>
79+
<artifactId>spark-connect_${scala.binary.version}</artifactId>
80+
<version>${project.version}</version>
81+
<exclusions>
82+
<exclusion>
83+
<groupId>org.apache.spark</groupId>
84+
<artifactId>spark-connect-common_${scala.binary.version}</artifactId>
85+
</exclusion>
86+
<exclusion>
87+
<groupId>io.grpc</groupId>
88+
<artifactId>*</artifactId>
89+
</exclusion>
90+
<exclusion>
91+
<groupId>com.google.code.gson</groupId>
92+
<artifactId>gson</artifactId>
93+
</exclusion>
94+
<exclusion>
95+
<groupId>com.google.guava</groupId>
96+
<artifactId>failureaccess</artifactId>
97+
</exclusion>
98+
</exclusions>
99+
</dependency>
100+
<dependency>
101+
<groupId>org.apache.spark</groupId>
102+
<artifactId>spark-avro_${scala.binary.version}</artifactId>
103+
<version>${project.version}</version>
104+
<scope>provided</scope>
105+
</dependency>
106+
<dependency>
107+
<groupId>org.apache.spark</groupId>
108+
<artifactId>spark-protobuf_${scala.binary.version}</artifactId>
109+
<version>${project.version}</version>
110+
<scope>provided</scope>
111+
</dependency>
77112

78113
<!--
79114
Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
@@ -138,34 +173,6 @@
138173
</dependency>
139174
</dependencies>
140175
</profile>
141-
<profile>
142-
<id>connect</id>
143-
<dependencies>
144-
<dependency>
145-
<groupId>org.apache.spark</groupId>
146-
<artifactId>spark-connect_${scala.binary.version}</artifactId>
147-
<version>${project.version}</version>
148-
<exclusions>
149-
<exclusion>
150-
<groupId>org.apache.spark</groupId>
151-
<artifactId>spark-connect-common_${scala.binary.version}</artifactId>
152-
</exclusion>
153-
</exclusions>
154-
</dependency>
155-
<dependency>
156-
<groupId>org.apache.spark</groupId>
157-
<artifactId>spark-avro_${scala.binary.version}</artifactId>
158-
<version>${project.version}</version>
159-
<scope>provided</scope>
160-
</dependency>
161-
<dependency>
162-
<groupId>org.apache.spark</groupId>
163-
<artifactId>spark-protobuf_${scala.binary.version}</artifactId>
164-
<version>${project.version}</version>
165-
<scope>provided</scope>
166-
</dependency>
167-
</dependencies>
168-
</profile>
169176
<profile>
170177
<id>kubernetes</id>
171178
<dependencies>

bin/spark-connect-shell

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,4 @@ if [ -z "${SPARK_HOME}" ]; then
2323
source "$(dirname "$0")"/find-spark-home
2424
fi
2525

26-
# This requires building the spark with `-Pconnect`, e,g, `build/sbt -Pconnect package`
27-
exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@"
26+
exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@"

connect/common/README.md

Lines changed: 5 additions & 0 deletions

connector/connect/common/pom.xml renamed to connect/common/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<groupId>org.apache.spark</groupId>
2424
<artifactId>spark-parent_2.13</artifactId>
2525
<version>4.0.0-SNAPSHOT</version>
26-
<relativePath>../../../pom.xml</relativePath>
26+
<relativePath>../../pom.xml</relativePath>
2727
</parent>
2828

2929
<artifactId>spark-connect-common_2.13</artifactId>

0 commit comments

Comments
 (0)