22
22
23
23
import java .io .File ;
24
24
import java .lang .reflect .Field ;
25
+ import java .lang .reflect .Method ;
26
+ import org .apache .hadoop .conf .Configuration ;
27
+ import org .apache .hadoop .fs .Path ;
25
28
import org .apache .iceberg .Files ;
26
29
import org .apache .iceberg .Schema ;
30
+ import org .apache .iceberg .data .GenericRecord ;
27
31
import org .apache .iceberg .data .Record ;
28
32
import org .apache .iceberg .data .orc .GenericOrcWriter ;
29
33
import org .apache .iceberg .io .FileAppender ;
34
+ import org .apache .iceberg .io .OutputFile ;
30
35
import org .apache .iceberg .types .Types ;
36
+ import org .apache .orc .OrcFile ;
37
+ import org .apache .orc .OrcProto ;
38
+ import org .apache .orc .Reader ;
39
+ import org .apache .orc .StripeInformation ;
40
+ import org .apache .orc .TypeDescription ;
41
+ import org .apache .orc .impl .OrcIndex ;
42
+ import org .apache .orc .impl .RecordReaderImpl ;
31
43
import org .apache .orc .impl .WriterImpl ;
32
44
import org .junit .Assert ;
33
45
import org .junit .Rule ;
@@ -48,8 +60,9 @@ public void testWriteOption() throws Exception {
48
60
File testFile = temp .newFile ();
49
61
Assert .assertTrue ("Delete should succeed" , testFile .delete ());
50
62
63
+ OutputFile outFile = Files .localOutput (testFile );
51
64
try (FileAppender <Record > writer =
52
- ORC .write (Files . localOutput ( testFile ) )
65
+ ORC .write (outFile )
53
66
.createWriterFunc (GenericOrcWriter ::buildWriter )
54
67
.schema (DATA_SCHEMA )
55
68
.set ("write.orc.bloom.filter.columns" , "id,name" )
@@ -69,9 +82,50 @@ public void testWriteOption() throws Exception {
69
82
boolean [] bloomFilterColumns = (boolean []) bloomFilterColumnsField .get (orcWriter );
70
83
double bloomFilterFpp = (double ) bloomFilterFppField .get (orcWriter );
71
84
85
+ // Validate whether the bloom filters are set in ORC SDK or not
72
86
Assert .assertTrue (bloomFilterColumns [1 ]);
73
87
Assert .assertTrue (bloomFilterColumns [2 ]);
74
88
Assert .assertEquals (0.04 , bloomFilterFpp , 1e-15 );
89
+
90
+ Record recordTemplate = GenericRecord .create (DATA_SCHEMA );
91
+ Record record1 = recordTemplate .copy ("id" , 1L , "name" , "foo" , "price" , 1.0 );
92
+ Record record2 = recordTemplate .copy ("id" , 2L , "name" , "bar" , "price" , 2.0 );
93
+ writer .add (record1 );
94
+ writer .add (record2 );
95
+ }
96
+
97
+ Class clazzFileDump = Class .forName ("org.apache.orc.tools.FileDump" );
98
+ Method getFormattedBloomFilters =
99
+ clazzFileDump .getDeclaredMethod (
100
+ "getFormattedBloomFilters" ,
101
+ int .class ,
102
+ OrcIndex .class ,
103
+ OrcFile .WriterVersion .class ,
104
+ TypeDescription .Category .class ,
105
+ OrcProto .ColumnEncoding .class );
106
+ getFormattedBloomFilters .setAccessible (true );
107
+
108
+ try (Reader reader =
109
+ OrcFile .createReader (
110
+ new Path (outFile .location ()), new OrcFile .ReaderOptions (new Configuration ())); ) {
111
+ boolean [] readCols = new boolean [] {false , true , true , false };
112
+ RecordReaderImpl rows = (RecordReaderImpl ) reader .rows ();
113
+ OrcIndex indices = rows .readRowIndex (0 , null , readCols );
114
+ StripeInformation stripe = reader .getStripes ().get (0 );
115
+ OrcProto .StripeFooter footer = rows .readStripeFooter (stripe );
116
+
117
+ String bloomFilterString =
118
+ (String )
119
+ getFormattedBloomFilters .invoke (
120
+ null ,
121
+ 1 ,
122
+ indices ,
123
+ reader .getWriterVersion (),
124
+ reader .getSchema ().findSubtype (1 ).getCategory (),
125
+ footer .getColumns (1 ));
126
+
127
+ // Validate whether the bloom filters are written ORC files or not
128
+ Assert .assertTrue (bloomFilterString .contains ("Bloom filters for column" ));
75
129
}
76
130
}
77
131
0 commit comments