Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build: Run Iceberg with JDK 17 #7391

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/delta-conversion-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
jvm: [8, 11]
jvm: [8, 11, 17]
env:
SPARK_LOCAL_IP: localhost
steps:
Expand Down Expand Up @@ -88,7 +88,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
jvm: [ 8, 11 ]
jvm: [8, 11, 17]
env:
SPARK_LOCAL_IP: localhost
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/hive-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
jvm: [8, 11]
jvm: [8, 11, 17]
env:
SPARK_LOCAL_IP: localhost
steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/java-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
jvm: [8, 11]
jvm: [8, 11, 17]
env:
SPARK_LOCAL_IP: localhost
steps:
Expand All @@ -71,7 +71,7 @@ jobs:
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -Pquick=true -x javadoc
- run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -Pquick=true -x javadoc
- uses: actions/upload-artifact@v3
if: failure()
with:
Expand Down
32 changes: 31 additions & 1 deletion .github/workflows/spark-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
strategy:
matrix:
jvm: [8, 11]
spark: ['3.2','3.3', '3.4']
spark: ['3.2','3.3','3.4']
env:
SPARK_LOCAL_IP: localhost
steps:
Expand All @@ -114,3 +114,33 @@ jobs:
name: test logs
path: |
**/build/testlogs

spark-3x-java-17-tests:
runs-on: ubuntu-22.04
strategy:
matrix:
spark: ['3.3','3.4']
scala-version: ['2.12', '2.13']
env:
SPARK_LOCAL_IP: localhost
steps:
- uses: actions/checkout@v3
- uses: actions/setup-java@v3
with:
distribution: zulu
java-version: 17
- uses: actions/cache@v3
with:
path: |
~/.gradle/caches
~/.gradle/wrapper
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }}
restore-keys: ${{ runner.os }}-gradle-
- run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts
- run: ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala-version }} -DhiveVersions= -DflinkVersions= :iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala-version }}:check :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala-version }}:check :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala-version }}:check -Pquick=true -x javadoc
- uses: actions/upload-artifact@v3
if: failure()
with:
name: test logs
path: |
**/build/testlogs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ public class ExpressionUtil {
private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");
private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?");
private static final Pattern TIMESTAMP =
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?");
Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry forgot to ask, why is this changed from 6 to 9? Does JDK17 change default precision of timestamp or something like that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes with my mac on using JDK 17 existing regex would pass, but with ubuntu with JDK 17, was getting 2023-04-20T19:17:17.748170628Z more details here: #7391 (comment)

private static final Pattern TIMESTAMPTZ =
Pattern.compile(
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?([-+]\\d{2}:\\d{2}|Z)");
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)");
static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10;
private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5;

Expand Down
31 changes: 30 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,36 @@ try {

if (JavaVersion.current() == JavaVersion.VERSION_1_8) {
project.ext.jdkVersion = '8'
project.ext.extraJvmArgs = []
} else if (JavaVersion.current() == JavaVersion.VERSION_11) {
project.ext.jdkVersion = '11'
project.ext.extraJvmArgs = []
} else if (JavaVersion.current() == JavaVersion.VERSION_17) {
project.ext.jdkVersion = '17'
project.ext.extraJvmArgs = ["-XX:+IgnoreUnrecognizedVMOptions",
"--add-opens", "java.base/java.io=ALL-UNNAMED",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to open all these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I collected these flag by debugging individual failures of ut's from test projects (spark / arrow / aws (kryo)) this is a superset i would say we may define only the relevant one's per package, but since there was no harm in passing additional flags hence passed all the flags in all the projects. please let me know your thoughts if the above makes sense, happy to add required flags per projects

"--add-opens", "java.base/java.lang.invoke=ALL-UNNAMED",
"--add-opens", "java.base/java.lang.reflect=ALL-UNNAMED",
"--add-opens", "java.base/java.lang=ALL-UNNAMED",
"--add-opens", "java.base/java.math=ALL-UNNAMED",
"--add-opens", "java.base/java.net=ALL-UNNAMED",
"--add-opens", "java.base/java.nio=ALL-UNNAMED",
"--add-opens", "java.base/java.text=ALL-UNNAMED",
"--add-opens", "java.base/java.time=ALL-UNNAMED",
"--add-opens", "java.base/java.util.concurrent.atomic=ALL-UNNAMED",
"--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED",
"--add-opens", "java.base/java.util.regex=ALL-UNNAMED",
"--add-opens", "java.base/java.util=ALL-UNNAMED",
"--add-opens", "java.base/jdk.internal.ref=ALL-UNNAMED",
"--add-opens", "java.base/jdk.internal.reflect=ALL-UNNAMED",
"--add-opens", "java.sql/java.sql=ALL-UNNAMED",
"--add-opens", "java.base/sun.util.calendar=ALL-UNNAMED",
"--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED",
"--add-opens", "java.base/sun.nio.cs=ALL-UNNAMED",
"--add-opens", "java.base/sun.security.action=ALL-UNNAMED",
"--add-opens", "java.base/sun.util.calendar=ALL-UNNAMED"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we could provide a list of classes to open and generate the --add-opens and =ALL-UNNAMED boilerplates. But that might be over-engineering, and I am fine as is. Curious what other people think.

} else {
throw new GradleException("This build must be run with JDK 8 or 11 but was executed with JDK " + JavaVersion.current())
throw new GradleException("This build must be run with JDK 8 or 11 or 17 but was executed with JDK " + JavaVersion.current())
}

apply plugin: 'com.gorylenko.gradle-git-properties'
Expand Down Expand Up @@ -217,6 +243,8 @@ subprojects {

maxHeapSize = "1500m"

jvmArgs += project.property('extraJvmArgs')

testLogging {
events "failed"
exceptionFormat "full"
Expand Down Expand Up @@ -558,6 +586,7 @@ project(':iceberg-delta-lake') {
task integrationTest(type: Test) {
testClassesDirs = sourceSets.integration.output.classesDirs
classpath = sourceSets.integration.runtimeClasspath
jvmArgs += project.property('extraJvmArgs')
singhpk234 marked this conversation as resolved.
Show resolved Hide resolved
}
check.dependsOn integrationTest
}
Expand Down
4 changes: 2 additions & 2 deletions jmh.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
* under the License.
*/

if (jdkVersion != '8' && jdkVersion != '11') {
throw new GradleException("The JMH benchamrks must be run with JDK 8 or JDK 11")
if (jdkVersion != '8' && jdkVersion != '11' && jdkVersion != '17') {
throw new GradleException("The JMH benchamrks must be run with JDK 8 or JDK 11 or JDK 17")
}

def sparkVersions = (System.getProperty("sparkVersions") != null ? System.getProperty("sparkVersions") : System.getProperty("defaultSparkVersions")).split(",")
Expand Down
1 change: 1 addition & 0 deletions spark/v3.3/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio
task integrationTest(type: Test) {
description = "Test Spark3 Runtime Jar against Spark ${sparkMajorVersion}"
group = "verification"
jvmArgs += project.property('extraJvmArgs')
testClassesDirs = sourceSets.integration.output.classesDirs
classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path)
inputs.file(shadowJar.archiveFile.get().asFile.path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.nio.ByteBuffer;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.Files;
import org.apache.iceberg.PartitionSpec;
Expand Down Expand Up @@ -76,6 +77,8 @@ public class TestMetadataTableReadableMetrics extends SparkTestBaseWithCatalog {
optional(8, "fixedCol", Types.FixedType.ofLength(3)),
optional(9, "binaryCol", Types.BinaryType.get()));

private DataFile dataFile;

public TestMetadataTableReadableMetrics() {
// only SparkCatalog supports metadata table sql queries
super(SparkCatalogConfig.HIVE);
Expand Down Expand Up @@ -122,8 +125,7 @@ private Table createPrimitiveTable() throws IOException {
createPrimitiveRecord(
false, 2, 2L, Float.NaN, 2.0D, new BigDecimal("2.00"), "2", null, null));

DataFile dataFile =
FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records);
dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records);
table.newAppend().appendFile(dataFile).commit();
return table;
}
Expand All @@ -141,8 +143,7 @@ private void createNestedTable() throws IOException {
createNestedRecord(0L, 0.0),
createNestedRecord(1L, Double.NaN),
createNestedRecord(null, null));
DataFile dataFile =
FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records);
dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records);
table.newAppend().appendFile(dataFile).commit();
}

Expand Down Expand Up @@ -192,30 +193,80 @@ private GenericRecord createNestedRecord(Long longCol, Double doubleCol) {
@Test
public void testPrimitiveColumns() throws Exception {
createPrimitiveTable();
Map<Integer, Long> columnSizeStats = dataFile.columnSizes();

Object[] binaryCol =
row(
59L,
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("binaryCol").fieldId()),
4L,
2L,
null,
Base64.getDecoder().decode("1111"),
Base64.getDecoder().decode("2222"));
Object[] booleanCol = row(44L, 4L, 0L, null, false, true);
Object[] decimalCol = row(97L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00"));
Object[] doubleCol = row(99L, 4L, 0L, 1L, 1.0D, 2.0D);
Object[] booleanCol =
row(
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("booleanCol").fieldId()),
4L,
0L,
null,
false,
true);
Object[] decimalCol =
row(
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("decimalCol").fieldId()),
4L,
1L,
null,
new BigDecimal("1.00"),
new BigDecimal("2.00"));
Object[] doubleCol =
row(
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("doubleCol").fieldId()),
4L,
0L,
1L,
1.0D,
2.0D);
Object[] fixedCol =
row(
55L,
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("fixedCol").fieldId()),
4L,
2L,
null,
Base64.getDecoder().decode("1111"),
Base64.getDecoder().decode("2222"));
Object[] floatCol = row(90L, 4L, 0L, 2L, 0f, 0f);
Object[] intCol = row(91L, 4L, 0L, null, 1, 2);
Object[] longCol = row(91L, 4L, 0L, null, 1L, 2L);
Object[] stringCol = row(99L, 4L, 0L, null, "1", "2");
Object[] floatCol =
row(
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("floatCol").fieldId()),
4L,
0L,
2L,
0f,
0f);
Object[] intCol =
row(
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("intCol").fieldId()),
4L,
0L,
null,
1,
2);
Object[] longCol =
row(
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("longCol").fieldId()),
4L,
0L,
null,
1L,
2L);
Object[] stringCol =
row(
columnSizeStats.get(PRIMITIVE_SCHEMA.findField("stringCol").fieldId()),
4L,
0L,
null,
"1",
"2");

Object[] metrics =
row(
Expand Down
1 change: 1 addition & 0 deletions spark/v3.4/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio
task integrationTest(type: Test) {
description = "Test Spark3 Runtime Jar against Spark ${sparkMajorVersion}"
group = "verification"
jvmArgs += project.property('extraJvmArgs')
testClassesDirs = sourceSets.integration.output.classesDirs
classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path)
inputs.file(shadowJar.archiveFile.get().asFile.path)
Expand Down
Loading