Skip to content

Commit 097ef5c

Browse files
committed
Add test case to validate bloom in files
1 parent e2b1212 commit 097ef5c

File tree

2 files changed

+56
-1
lines changed

2 files changed

+56
-1
lines changed

build.gradle

+1
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ project(':iceberg-orc') {
545545
testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
546546
testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')
547547
testImplementation project(':iceberg-common')
548+
testImplementation 'org.apache.orc:orc-tools'
548549
}
549550
}
550551

orc/src/test/java/org/apache/iceberg/orc/TestBloomFilter.java

+55-1
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,24 @@
2222

2323
import java.io.File;
2424
import java.lang.reflect.Field;
25+
import java.lang.reflect.Method;
26+
import org.apache.hadoop.conf.Configuration;
27+
import org.apache.hadoop.fs.Path;
2528
import org.apache.iceberg.Files;
2629
import org.apache.iceberg.Schema;
30+
import org.apache.iceberg.data.GenericRecord;
2731
import org.apache.iceberg.data.Record;
2832
import org.apache.iceberg.data.orc.GenericOrcWriter;
2933
import org.apache.iceberg.io.FileAppender;
34+
import org.apache.iceberg.io.OutputFile;
3035
import org.apache.iceberg.types.Types;
36+
import org.apache.orc.OrcFile;
37+
import org.apache.orc.OrcProto;
38+
import org.apache.orc.Reader;
39+
import org.apache.orc.StripeInformation;
40+
import org.apache.orc.TypeDescription;
41+
import org.apache.orc.impl.OrcIndex;
42+
import org.apache.orc.impl.RecordReaderImpl;
3143
import org.apache.orc.impl.WriterImpl;
3244
import org.junit.Assert;
3345
import org.junit.Rule;
@@ -48,8 +60,9 @@ public void testWriteOption() throws Exception {
4860
File testFile = temp.newFile();
4961
Assert.assertTrue("Delete should succeed", testFile.delete());
5062

63+
OutputFile outFile = Files.localOutput(testFile);
5164
try (FileAppender<Record> writer =
52-
ORC.write(Files.localOutput(testFile))
65+
ORC.write(outFile)
5366
.createWriterFunc(GenericOrcWriter::buildWriter)
5467
.schema(DATA_SCHEMA)
5568
.set("write.orc.bloom.filter.columns", "id,name")
@@ -69,9 +82,50 @@ public void testWriteOption() throws Exception {
6982
boolean[] bloomFilterColumns = (boolean[]) bloomFilterColumnsField.get(orcWriter);
7083
double bloomFilterFpp = (double) bloomFilterFppField.get(orcWriter);
7184

85+
// Validate whether the bloom filters are set in ORC SDK or not
7286
Assert.assertTrue(bloomFilterColumns[1]);
7387
Assert.assertTrue(bloomFilterColumns[2]);
7488
Assert.assertEquals(0.04, bloomFilterFpp, 1e-15);
89+
90+
Record recordTemplate = GenericRecord.create(DATA_SCHEMA);
91+
Record record1 = recordTemplate.copy("id", 1L, "name", "foo", "price", 1.0);
92+
Record record2 = recordTemplate.copy("id", 2L, "name", "bar", "price", 2.0);
93+
writer.add(record1);
94+
writer.add(record2);
95+
}
96+
97+
Class clazzFileDump = Class.forName("org.apache.orc.tools.FileDump");
98+
Method getFormattedBloomFilters =
99+
clazzFileDump.getDeclaredMethod(
100+
"getFormattedBloomFilters",
101+
int.class,
102+
OrcIndex.class,
103+
OrcFile.WriterVersion.class,
104+
TypeDescription.Category.class,
105+
OrcProto.ColumnEncoding.class);
106+
getFormattedBloomFilters.setAccessible(true);
107+
108+
try (Reader reader =
109+
OrcFile.createReader(
110+
new Path(outFile.location()), new OrcFile.ReaderOptions(new Configuration())); ) {
111+
boolean[] readCols = new boolean[] {false, true, true, false};
112+
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
113+
OrcIndex indices = rows.readRowIndex(0, null, readCols);
114+
StripeInformation stripe = reader.getStripes().get(0);
115+
OrcProto.StripeFooter footer = rows.readStripeFooter(stripe);
116+
117+
String bloomFilterString =
118+
(String)
119+
getFormattedBloomFilters.invoke(
120+
null,
121+
1,
122+
indices,
123+
reader.getWriterVersion(),
124+
reader.getSchema().findSubtype(1).getCategory(),
125+
footer.getColumns(1));
126+
127+
// Validate whether the bloom filters are written ORC files or not
128+
Assert.assertTrue(bloomFilterString.contains("Bloom filters for column"));
75129
}
76130
}
77131

0 commit comments

Comments
 (0)