Skip to content

Commit

Permalink
Core: create an empty Hadoop config if not provided in constructor
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenzwu committed Oct 9, 2024
1 parent 208ab20 commit 1dfcf72
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public HadoopFileIO(SerializableSupplier<Configuration> hadoopConf) {
}

public Configuration conf() {
return hadoopConf.get();
return getConf();
}

@Override
Expand Down Expand Up @@ -120,6 +120,12 @@ public void setConf(Configuration conf) {

@Override
public Configuration getConf() {
// Create a default hadoopConf as it is required for the object to be valid.
// E.g. newInputFile would throw NPE with hadoopConf.get() otherwise.
if (hadoopConf == null) {
this.hadoopConf = new SerializableConfiguration(new Configuration())::get;
}

return hadoopConf.get();
}

Expand Down
48 changes: 48 additions & 0 deletions core/src/test/java/org/apache/iceberg/hadoop/HadoopFileIOTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.util.List;
import java.util.Random;
import java.util.UUID;
Expand All @@ -36,6 +37,7 @@
import org.apache.iceberg.common.DynMethods;
import org.apache.iceberg.io.BulkDeletionFailureException;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.FileIOParser;
import org.apache.iceberg.io.ResolvingFileIO;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
Expand Down Expand Up @@ -176,6 +178,52 @@ public void testResolvingFileIOLoad() {
assertThat(result).isInstanceOf(HadoopFileIO.class);
}

@Test
public void testJsonParserWithoutHadoopConf() throws Exception {
this.hadoopFileIO = new HadoopFileIO();

hadoopFileIO.initialize(ImmutableMap.of("properties-bar", "2"));
assertThat(hadoopFileIO.properties().get("properties-bar")).isEqualTo("2");

testJsonParser(hadoopFileIO, tempDir);
}

@Test
public void testJsonParserWithHadoopConf() throws Exception {
this.hadoopFileIO = new HadoopFileIO();

Configuration hadoopConf = new Configuration();
hadoopConf.setInt("hadoop-conf-foo", 1);
hadoopFileIO.setConf(hadoopConf);
assertThat(hadoopFileIO.conf().get("hadoop-conf-foo")).isNotNull();

hadoopFileIO.initialize(ImmutableMap.of("properties-bar", "2"));
assertThat(hadoopFileIO.properties().get("properties-bar")).isEqualTo("2");

testJsonParser(hadoopFileIO, tempDir);
}

private static void testJsonParser(HadoopFileIO hadoopFileIO, File tempDir) throws Exception {
String json = FileIOParser.toJson(hadoopFileIO);
try (FileIO deserialized = FileIOParser.fromJson(json)) {
assertThat(deserialized).isInstanceOf(HadoopFileIO.class);
HadoopFileIO deserializedHadoopFileIO = (HadoopFileIO) deserialized;

// properties are carried over during serialization and deserialization
assertThat(deserializedHadoopFileIO.properties()).isEqualTo(hadoopFileIO.properties());

// FileIOParser doesn't serialize and deserialize Hadoop configuration
// so config "foo" is not restored in deserialized object.
assertThat(deserializedHadoopFileIO.conf().get("hadoop-conf-foo")).isNull();

// make sure deserialized io can create input file
String inputFilePath =
Files.createTempDirectory(tempDir.toPath(), "junit").toFile().getAbsolutePath()
+ "/test.parquet";
deserializedHadoopFileIO.newInputFile(inputFilePath);
}
}

private List<Path> createRandomFiles(Path parent, int count) {
Vector<Path> paths = new Vector<>();
random
Expand Down

0 comments on commit 1dfcf72

Please sign in to comment.