Skip to content

Commit

Permalink
[#3555] feat(spark-connector): support multi scala version (#3608)
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
support multi scala version 

### Why are the changes needed?

Fix: #3555 

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
1. existing IT and UT
2. in my local machine, tested spark sqls on spark3.4-2.13 and
spark3.5-2.13
  • Loading branch information
FANNG1 authored and web-flow committed May 29, 2024
1 parent 02125e5 commit 06f469a
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 7 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/spark-integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ jobs:
matrix:
architecture: [linux/amd64]
java-version: [ 8, 11, 17 ]
scala-version: [ 2.12 ]
test-mode: [ embedded, deploy ]
env:
PLATFORM: ${{ matrix.architecture }}
Expand Down Expand Up @@ -91,9 +92,11 @@ jobs:
- name: Spark Integration Test
id: integrationTest
run: |
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} :spark-connector:spark-3.3:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**"
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} :spark-connector:spark-3.4:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**"
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} :spark-connector:spark-3.5:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**"
if [ "${{ matrix.scala-version }}" == "2.12" ];then
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -PscalaVersion=${{ matrix.scala-version }} :spark-connector:spark-3.3:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**"
fi
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -PscalaVersion=${{ matrix.scala-version }} :spark-connector:spark-3.4:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**"
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} -PscalaVersion=${{ matrix.scala-version }} :spark-connector:spark-3.5:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**"
- name: Upload integrate tests reports
uses: actions/upload-artifact@v3
Expand Down
14 changes: 11 additions & 3 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ plugins {

rootProject.name = "gravitino"

val scalaVersion: String = gradle.startParameter.projectProperties["scalaVersion"]?.toString()
?: settings.extra["defaultScalaVersion"].toString()

include("api", "common", "core", "meta", "server", "integration-test", "server-common")
include("catalogs:bundled-catalog")
include("catalogs:catalog-hive")
Expand All @@ -28,9 +31,14 @@ include(
"clients:client-python"
)
include("trino-connector")
include("spark-connector:spark-common", "spark-connector:spark-3.3", "spark-connector:spark-runtime-3.3", "spark-connector:spark-3.4", "spark-connector:spark-runtime-3.4", "spark-connector:spark-3.5", "spark-connector:spark-runtime-3.5")
project(":spark-connector:spark-3.3").projectDir = file("spark-connector/v3.3/spark")
project(":spark-connector:spark-runtime-3.3").projectDir = file("spark-connector/v3.3/spark-runtime")
include("spark-connector:spark-common")
// kyuubi hive connector doesn't support 2.13 for Spark3.3
if (scalaVersion == "2.12") {
include("spark-connector:spark-3.3", "spark-connector:spark-runtime-3.3")
project(":spark-connector:spark-3.3").projectDir = file("spark-connector/v3.3/spark")
project(":spark-connector:spark-runtime-3.3").projectDir = file("spark-connector/v3.3/spark-runtime")
}
include("spark-connector:spark-3.4", "spark-connector:spark-runtime-3.4", "spark-connector:spark-3.5", "spark-connector:spark-runtime-3.5")
project(":spark-connector:spark-3.4").projectDir = file("spark-connector/v3.4/spark")
project(":spark-connector:spark-runtime-3.4").projectDir = file("spark-connector/v3.4/spark-runtime")
project(":spark-connector:spark-3.5").projectDir = file("spark-connector/v3.5/spark")
Expand Down
3 changes: 2 additions & 1 deletion spark-connector/spark-common/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr
val sparkVersion: String = libs.versions.spark33.get()
val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".")
val icebergVersion: String = libs.versions.iceberg4spark.get()
val kyuubiVersion: String = libs.versions.kyuubi4spark33.get()
// kyuubi hive connector for Spark 3.3 doesn't support scala 2.13
val kyuubiVersion: String = libs.versions.kyuubi4spark34.get()
val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get()
val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ public org.apache.spark.sql.connector.expressions.Transform[] toSparkTransform(
return sparkTransforms.toArray(new org.apache.spark.sql.connector.expressions.Transform[0]);
}

@SuppressWarnings("deprecation")
private static Distribution toGravitinoDistribution(BucketTransform bucketTransform) {
int bucketNum = (Integer) bucketTransform.numBuckets().value();
Expression[] expressions =
Expand All @@ -249,6 +250,7 @@ private static Distribution toGravitinoDistribution(BucketTransform bucketTransf
}

// Spark datasourceV2 doesn't support specify sort order direction, use ASCENDING as default.
@SuppressWarnings("deprecation")
private static Pair<Distribution, SortOrder[]> toGravitinoDistributionAndSortOrders(
SortedBucketTransform sortedBucketTransform) {
int bucketNum = (Integer) sortedBucketTransform.numBuckets().value();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ void testPartition(boolean supportsBucketPartition) {
});
}

@SuppressWarnings("deprecation")
@ParameterizedTest
@ValueSource(booleans = {false, true})
void testGravitinoToSparkDistributionWithoutSortOrder(boolean supportsBucketPartition) {
Expand Down Expand Up @@ -180,6 +181,7 @@ void testSparkToGravitinoDistributionWithSortOrder(boolean supportsBucketPartiti
}
}

@SuppressWarnings("deprecation")
@ParameterizedTest
@ValueSource(booleans = {false, true})
void testGravitinoToSparkDistributionWithSortOrder(boolean supportsBucketPartition) {
Expand Down

0 comments on commit 06f469a

Please sign in to comment.