Skip to content

Commit 6749394

Browse files
HyukjinKwonattilapiros
authored andcommitted
[SPARK-48763][CONNECT][BUILD][FOLLOW-UP] Move Spark Connect common/server into sql directory
### What changes were proposed in this pull request? This PR is a followup of apache#47157 that moves `connect` into `sql/connect`. ### Why are the changes needed? The reasons are as follow: - There was a bit of question about moving `connect` as a standalone top level (apache#47157 (comment)). - Technically all Spark Connect related code have to placed under `sql` just like Hive thrift server. - Spark Connect server is 99% SQL dedicated code for now - Spark Connect server already is using a lot of `spark.sql` configurations, e.g., `spark.sql.connect.serverStacktrace.enabled` - Spark Connect common is only for SQL module. If other components have to be implemented, that common has to be placed within that directory. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI in this PR should verify it. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#47579 from HyukjinKwon/SPARK-48763-followup. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent e7943cd commit 6749394

File tree

2,251 files changed

+45
-32
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,251 files changed

+45
-32
lines changed

.github/labeler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ DEPLOY:
225225
CONNECT:
226226
- changed-files:
227227
- any-glob-to-any-file: [
228-
'connect/**/*',
228+
'sql/connect/**/*',
229229
'connector/connect/**/*',
230230
'python/pyspark/sql/**/connect/**/*',
231231
'python/pyspark/ml/**/connect/**/*'

.github/workflows/build_and_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ jobs:
606606
- name: Breaking change detection against branch-3.5
607607
uses: bufbuild/buf-breaking-action@v1
608608
with:
609-
input: connect/common/src/main
609+
input: sql/connect/common/src/main
610610
against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
611611
- name: Install Python 3.9
612612
uses: actions/setup-python@v5

.github/workflows/build_python_connect.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ jobs:
8484
# Start a Spark Connect server for local
8585
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
8686
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
87-
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
87+
--jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
8888
8989
# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
9090
mv python/lib lib.back
@@ -104,7 +104,7 @@ jobs:
104104
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
105105
--master "local-cluster[2, 4, 1024]" \
106106
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
107-
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
107+
--jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
108108
109109
# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
110110
mv python/lib lib.back

.github/workflows/build_python_connect35.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ jobs:
8787
# Start a Spark Connect server for local
8888
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
8989
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
90-
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
90+
--jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
9191
9292
# Checkout to branch-3.5 to use the tests in branch-3.5.
9393
cd ..

.github/workflows/maven_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ jobs:
194194
if [[ "$INCLUDED_TAGS" != "" ]]; then
195195
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
196196
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
197-
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connect/common,connect/server test -fae
197+
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,sql/connect/common,sql/connect/server test -fae
198198
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
199199
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
200200
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then

connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ class ClientE2ETestSuite
308308
val testDataPath = java.nio.file.Paths
309309
.get(
310310
IntegrationTestUtils.sparkHome,
311+
"sql",
311312
"connect",
312313
"common",
313314
"src",
@@ -347,6 +348,7 @@ class ClientE2ETestSuite
347348
val testDataPath = java.nio.file.Paths
348349
.get(
349350
IntegrationTestUtils.sparkHome,
351+
"sql",
350352
"connect",
351353
"common",
352354
"src",
@@ -377,6 +379,7 @@ class ClientE2ETestSuite
377379
val testDataPath = java.nio.file.Paths
378380
.get(
379381
IntegrationTestUtils.sparkHome,
382+
"sql",
380383
"connect",
381384
"common",
382385
"src",

connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ import org.apache.spark.util.SparkFileUtils
7171
* compatibility.
7272
*
7373
* Note that the plan protos are used as the input for the `ProtoToParsedPlanTestSuite` in the
74-
* `connect/server` module
74+
* `sql/connect/server` module
7575
*/
7676
// scalastyle:on
7777
class PlanGenerationTestSuite
@@ -88,7 +88,7 @@ class PlanGenerationTestSuite
8888

8989
protected val queryFilePath: Path = commonResourcePath.resolve("query-tests/queries")
9090

91-
// A relative path to /connect/server, used by `ProtoToParsedPlanTestSuite` to run
91+
// A relative path to /sql/connect/server, used by `ProtoToParsedPlanTestSuite` to run
9292
// with the datasource.
9393
protected val testDataPath: Path = java.nio.file.Paths.get(
9494
"../",
@@ -3325,10 +3325,10 @@ class PlanGenerationTestSuite
33253325
/* Protobuf functions */
33263326
// scalastyle:off line.size.limit
33273327
// If `common.desc` needs to be updated, execute the following command to regenerate it:
3328-
// 1. cd connect/common/src/main/protobuf/spark/connect
3328+
// 1. cd sql/connect/common/src/main/protobuf/spark/connect
33293329
// 2. protoc --include_imports --descriptor_set_out=../../../../test/resources/protobuf-tests/common.desc common.proto
33303330
// scalastyle:on line.size.limit
3331-
private val testDescFilePath: String = s"${IntegrationTestUtils.sparkHome}/connect/" +
3331+
private val testDescFilePath: String = s"${IntegrationTestUtils.sparkHome}/sql/connect/" +
33323332
"common/src/test/resources/protobuf-tests/common.desc"
33333333

33343334
// TODO(SPARK-45030): Re-enable this test when all Maven test scenarios succeed and there

connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
4242
private val testDataPath = Paths
4343
.get(
4444
IntegrationTestUtils.sparkHome,
45+
"sql",
4546
"connect",
4647
"common",
4748
"src",

connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,17 @@ trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite
3535
}
3636

3737
protected def baseResourcePath: Path = {
38-
getWorkspaceFilePath("connect", "client", "jvm", "src", "test", "resources").toAbsolutePath
38+
getWorkspaceFilePath(
39+
"sql",
40+
"connect",
41+
"client",
42+
"jvm",
43+
"src",
44+
"test",
45+
"resources").toAbsolutePath
3946
}
4047

4148
protected def commonResourcePath: Path = {
42-
getWorkspaceFilePath("connect", "common", "src", "test", "resources").toAbsolutePath
49+
getWorkspaceFilePath("sql", "connect", "common", "src", "test", "resources").toAbsolutePath
4350
}
4451
}

connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ object SparkConnectServerUtils {
6060
private lazy val sparkConnect: java.lang.Process = {
6161
debug("Starting the Spark Connect Server...")
6262
val connectJar =
63-
findJar("connect/server", "spark-connect-assembly", "spark-connect").getCanonicalPath
63+
findJar("sql/connect/server", "spark-connect-assembly", "spark-connect").getCanonicalPath
6464

6565
val command = Seq.newBuilder[String]
6666
command += "bin/spark-submit"

0 commit comments

Comments
 (0)