Skip to content

Commit 7556aa1

Browse files
dongjoon-hyunpull[bot]
authored andcommitted
[SPARK-44125][R] Support Java 21 in SparkR
### What changes were proposed in this pull request? This PR aims to support Java 21 in SparkR. Arrow-related issue will be fixed when we upgrade Arrow library. Also, the following JIRA is created to re-enable them even in Java 21. - SPARK-44127 Reenable `test_sparkSQL_arrow.R` in Java 21 ### Why are the changes needed? To be ready for Java 21. ### Does this PR introduce _any_ user-facing change? No, this is additional support. ### How was this patch tested? Pass the CIs and do manual tests. ``` $ java -version openjdk version "21-ea" 2023-09-19 OpenJDK Runtime Environment (build 21-ea+27-2343) OpenJDK 64-Bit Server VM (build 21-ea+27-2343, mixed mode, sharing) $ build/sbt test:package -Psparkr -Phive $ R/install-dev.sh; R/run-tests.sh ... ══ Skipped ═════════════════════════════════════════════════════════════════════ 1. createDataFrame/collect Arrow optimization ('test_sparkSQL_arrow.R:29:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 2. createDataFrame/collect Arrow optimization - many partitions (partition order test) ('test_sparkSQL_arrow.R:47:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 3. createDataFrame/collect Arrow optimization - type specification ('test_sparkSQL_arrow.R:54:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 4. dapply() Arrow optimization ('test_sparkSQL_arrow.R:79:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 5. dapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:114:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 6. dapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:144:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 7. gapply() Arrow optimization ('test_sparkSQL_arrow.R:154:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 8. gapply() Arrow optimization - type specification ('test_sparkSQL_arrow.R:198:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 9. gapply() Arrow optimization - type specification (date and timestamp) ('test_sparkSQL_arrow.R:231:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 10. Arrow optimization - unsupported types ('test_sparkSQL_arrow.R:243:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 11. SPARK-32478: gapply() Arrow optimization - error message for schema mismatch ('test_sparkSQL_arrow.R:255:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 12. SPARK-43789: Automatically pick the number of partitions based on Arrow batch size ('test_sparkSQL_arrow.R:265:3') - Reason: sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21") is TRUE 13. sparkJars tag in SparkContext ('test_Windows.R:22:5') - Reason: This test is only for Windows, skipped ══ DONE ════════════════════════════════════════════════════════════════════════ ... * DONE Status: 2 NOTEs See ‘/Users/dongjoon/APACHE/spark-merge/R/SparkR.Rcheck/00check.log’ for details. + popd Tests passed. ``` Closes #41680 from dongjoon-hyun/SPARK-44125. Authored-by: Dongjoon Hyun <dongjoon@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent 37b67ae commit 7556aa1

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

R/pkg/R/client.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,10 @@ checkJavaVersion <- function() {
9393
}, javaVersionOut)
9494

9595
javaVersionStr <- strsplit(javaVersionFilter[[1]], '"', fixed = TRUE)[[1L]][2]
96-
# javaVersionStr is of the form 1.8.0_92/9.0.x/11.0.x.
97-
# We are using 8, 9, 10, 11 for sparkJavaVersion.
96+
# javaVersionStr is of the form 1.8.0_92/11.0.x./17.0.x/21-ea/21
97+
# We are using 8, 11, 17, and 21 for sparkJavaVersion.
98+
javaVersionStr <- strsplit(javaVersionStr, "-ea", fixed = TRUE)[[1L]]
99+
98100
versions <- strsplit(javaVersionStr, ".", fixed = TRUE)[[1L]]
99101
if ("1" == versions[1]) {
100102
javaVersionNum <- as.integer(versions[2])

R/pkg/tests/fulltests/test_sparkSQL_arrow.R

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ sparkSession <- sparkR.session(
2626

2727
test_that("createDataFrame/collect Arrow optimization", {
2828
skip_if_not_installed("arrow")
29+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
30+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
2931

3032
conf <- callJMethod(sparkSession, "conf")
3133
arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]]
@@ -43,12 +45,16 @@ test_that("createDataFrame/collect Arrow optimization", {
4345

4446
test_that("createDataFrame/collect Arrow optimization - many partitions (partition order test)", {
4547
skip_if_not_installed("arrow")
48+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
49+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
4650
expect_equal(collect(createDataFrame(mtcars, numPartitions = 32)),
4751
collect(createDataFrame(mtcars, numPartitions = 1)))
4852
})
4953

5054
test_that("createDataFrame/collect Arrow optimization - type specification", {
5155
skip_if_not_installed("arrow")
56+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
57+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
5258
rdf <- data.frame(list(list(a = 1,
5359
b = "a",
5460
c = TRUE,
@@ -73,6 +79,8 @@ test_that("createDataFrame/collect Arrow optimization - type specification", {
7379

7480
test_that("dapply() Arrow optimization", {
7581
skip_if_not_installed("arrow")
82+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
83+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
7684
df <- createDataFrame(mtcars)
7785

7886
conf <- callJMethod(sparkSession, "conf")
@@ -107,6 +115,8 @@ test_that("dapply() Arrow optimization", {
107115

108116
test_that("dapply() Arrow optimization - type specification", {
109117
skip_if_not_installed("arrow")
118+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
119+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
110120
# Note that regular dapply() seems not supporting date and timestamps
111121
# whereas Arrow-optimized dapply() does.
112122
rdf <- data.frame(list(list(a = 1,
@@ -136,6 +146,8 @@ test_that("dapply() Arrow optimization - type specification", {
136146

137147
test_that("dapply() Arrow optimization - type specification (date and timestamp)", {
138148
skip_if_not_installed("arrow")
149+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
150+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
139151
rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
140152
b = as.POSIXct("1990-02-24 12:34:56"))))
141153
df <- createDataFrame(rdf)
@@ -145,6 +157,8 @@ test_that("dapply() Arrow optimization - type specification (date and timestamp)
145157

146158
test_that("gapply() Arrow optimization", {
147159
skip_if_not_installed("arrow")
160+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
161+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
148162
df <- createDataFrame(mtcars)
149163

150164
conf <- callJMethod(sparkSession, "conf")
@@ -188,6 +202,8 @@ test_that("gapply() Arrow optimization", {
188202

189203
test_that("gapply() Arrow optimization - type specification", {
190204
skip_if_not_installed("arrow")
205+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
206+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
191207
# Note that regular gapply() seems not supporting date and timestamps
192208
# whereas Arrow-optimized gapply() does.
193209
rdf <- data.frame(list(list(a = 1,
@@ -220,6 +236,8 @@ test_that("gapply() Arrow optimization - type specification", {
220236

221237
test_that("gapply() Arrow optimization - type specification (date and timestamp)", {
222238
skip_if_not_installed("arrow")
239+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
240+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
223241
rdf <- data.frame(list(list(a = as.Date("1990-02-24"),
224242
b = as.POSIXct("1990-02-24 12:34:56"))))
225243
df <- createDataFrame(rdf)
@@ -231,6 +249,8 @@ test_that("gapply() Arrow optimization - type specification (date and timestamp)
231249

232250
test_that("Arrow optimization - unsupported types", {
233251
skip_if_not_installed("arrow")
252+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
253+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
234254

235255
expect_error(checkSchemaInArrow(structType("a FLOAT")), "not support float type")
236256
expect_error(checkSchemaInArrow(structType("a BINARY")), "not support binary type")
@@ -242,6 +262,8 @@ test_that("Arrow optimization - unsupported types", {
242262

243263
test_that("SPARK-32478: gapply() Arrow optimization - error message for schema mismatch", {
244264
skip_if_not_installed("arrow")
265+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
266+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
245267
df <- createDataFrame(list(list(a = 1L, b = "a")))
246268

247269
expect_error(
@@ -251,6 +273,8 @@ test_that("SPARK-32478: gapply() Arrow optimization - error message for schema m
251273

252274
test_that("SPARK-43789: Automatically pick the number of partitions based on Arrow batch size", {
253275
skip_if_not_installed("arrow")
276+
# TODO(SPARK-44127) Reenable test_sparkSQL_arrow.R in Java 21
277+
skip_if(sparkR.callJStatic("org.apache.spark.util.Utils", "isJavaVersionAtLeast21"))
254278

255279
conf <- callJMethod(sparkSession, "conf")
256280
maxRecordsPerBatch <- sparkR.conf("spark.sql.execution.arrow.maxRecordsPerBatch")[[1]]

0 commit comments

Comments
 (0)