Skip to content

Commit 1797d5f

Browse files
committed
Add test case to validate bloom in files
1 parent 0b1854e commit 1797d5f

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

build.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ project(':iceberg-orc') {
527527

528528
testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
529529
testImplementation project(':iceberg-common')
530+
testImplementation 'org.apache.orc:orc-tools'
530531
}
531532
}
532533

orc/src/test/java/org/apache/iceberg/orc/TestBloomFilter.java

+53-1
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,24 @@
2222

2323
import java.io.File;
2424
import java.lang.reflect.Field;
25+
import java.lang.reflect.Method;
26+
import org.apache.hadoop.conf.Configuration;
27+
import org.apache.hadoop.fs.Path;
2528
import org.apache.iceberg.Files;
2629
import org.apache.iceberg.Schema;
30+
import org.apache.iceberg.data.GenericRecord;
2731
import org.apache.iceberg.data.Record;
2832
import org.apache.iceberg.data.orc.GenericOrcWriter;
2933
import org.apache.iceberg.io.FileAppender;
34+
import org.apache.iceberg.io.OutputFile;
3035
import org.apache.iceberg.types.Types;
36+
import org.apache.orc.OrcFile;
37+
import org.apache.orc.OrcProto;
38+
import org.apache.orc.Reader;
39+
import org.apache.orc.StripeInformation;
40+
import org.apache.orc.TypeDescription;
41+
import org.apache.orc.impl.OrcIndex;
42+
import org.apache.orc.impl.RecordReaderImpl;
3143
import org.apache.orc.impl.WriterImpl;
3244
import org.junit.Assert;
3345
import org.junit.Rule;
@@ -48,14 +60,16 @@ public void testWriteOption() throws Exception {
4860
File testFile = temp.newFile();
4961
Assert.assertTrue("Delete should succeed", testFile.delete());
5062

63+
OutputFile outFile = Files.localOutput(testFile);
5164
try (FileAppender<Record> writer =
52-
ORC.write(Files.localOutput(testFile))
65+
ORC.write(outFile)
5366
.createWriterFunc(GenericOrcWriter::buildWriter)
5467
.schema(DATA_SCHEMA)
5568
.set("write.orc.bloom.filter.columns", "id,name")
5669
.set("write.orc.bloom.filter.fpp", "0.04")
5770
.build()) {
5871

72+
// Validate whether the bloom filters are set in ORC SDK or not
5973
Class clazzOrcFileAppender = Class.forName("org.apache.iceberg.orc.OrcFileAppender");
6074
Field writerField = clazzOrcFileAppender.getDeclaredField("writer");
6175
writerField.setAccessible(true);
@@ -72,7 +86,45 @@ public void testWriteOption() throws Exception {
7286
Assert.assertTrue(bloomFilterColumns[1]);
7387
Assert.assertTrue(bloomFilterColumns[2]);
7488
Assert.assertEquals(0.04, bloomFilterFpp, 1e-15);
89+
90+
Record recordTemplate = GenericRecord.create(DATA_SCHEMA);
91+
Record record1 = recordTemplate.copy("id", 1L, "name", "foo", "price", 1.0);
92+
Record record2 = recordTemplate.copy("id", 2L, "name", "bar", "price", 2.0);
93+
writer.add(record1);
94+
writer.add(record2);
7595
}
96+
97+
// Validate whether the bloom filters are written ORC files or not
98+
Class clazzFileDump = Class.forName("org.apache.orc.tools.FileDump");
99+
Method getFormattedBloomFilters =
100+
clazzFileDump.getDeclaredMethod(
101+
"getFormattedBloomFilters",
102+
int.class,
103+
OrcIndex.class,
104+
OrcFile.WriterVersion.class,
105+
TypeDescription.Category.class,
106+
OrcProto.ColumnEncoding.class);
107+
getFormattedBloomFilters.setAccessible(true);
108+
109+
Reader reader =
110+
OrcFile.createReader(
111+
new Path(outFile.location()), new OrcFile.ReaderOptions(new Configuration()));
112+
boolean[] readCols = new boolean[] {false, true, true, false};
113+
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
114+
OrcIndex indices = rows.readRowIndex(0, null, readCols);
115+
StripeInformation stripe = reader.getStripes().get(0);
116+
OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
117+
String bloomFilterString =
118+
(String)
119+
getFormattedBloomFilters.invoke(
120+
null,
121+
1,
122+
indices,
123+
reader.getWriterVersion(),
124+
reader.getSchema().findSubtype(1).getCategory(),
125+
footer.getColumns(1));
126+
127+
Assert.assertTrue(bloomFilterString.contains("Bloom filters for column"));
76128
}
77129

78130
@Test

0 commit comments

Comments
 (0)