Commit 45ef127 1 parent 0dd66f1 commit 45ef127 Copy full SHA for 45ef127
File tree 2 files changed +14
-4
lines changed
orc/src/main/java/org/apache/iceberg/orc
spark/v3.2/spark/src/test/java/org/apache/iceberg/spark/data
2 files changed +14
-4
lines changed Original file line number Diff line number Diff line change 37
37
import org .apache .iceberg .relocated .com .google .common .base .Preconditions ;
38
38
import org .apache .iceberg .relocated .com .google .common .collect .Lists ;
39
39
import org .apache .orc .OrcFile ;
40
- import org .apache .orc .Reader ;
41
40
import org .apache .orc .StripeInformation ;
42
41
import org .apache .orc .TypeDescription ;
43
42
import org .apache .orc .Writer ;
@@ -147,11 +146,11 @@ public long length() {
147
146
@ Override
148
147
public List <Long > splitOffsets () {
149
148
Preconditions .checkState (isClosed , "File is not yet closed" );
150
- try ( Reader reader = ORC . newFileReader ( file . toInputFile (), conf )) {
151
- List <StripeInformation > stripes = reader .getStripes ();
149
+ try {
150
+ List <StripeInformation > stripes = writer .getStripes ();
152
151
return Collections .unmodifiableList (Lists .transform (stripes , StripeInformation ::getOffset ));
153
152
} catch (IOException e ) {
154
- throw new RuntimeIOException (e , "Can't close ORC reader %s" , file .location ());
153
+ throw new RuntimeIOException (e , "Cannot receive stripe information from writer for %s" , file .location ());
155
154
}
156
155
}
157
156
Original file line number Diff line number Diff line change 22
22
23
23
import java .io .File ;
24
24
import java .io .IOException ;
25
+ import java .util .stream .Collectors ;
26
+ import org .apache .hadoop .conf .Configuration ;
27
+ import org .apache .hadoop .fs .Path ;
25
28
import org .apache .iceberg .Files ;
26
29
import org .apache .iceberg .Schema ;
27
30
import org .apache .iceberg .io .FileAppender ;
28
31
import org .apache .iceberg .orc .ORC ;
29
32
import org .apache .iceberg .types .Types ;
33
+ import org .apache .orc .OrcFile ;
34
+ import org .apache .orc .Reader ;
35
+ import org .apache .orc .StripeInformation ;
30
36
import org .apache .spark .sql .catalyst .InternalRow ;
31
37
import org .junit .Assert ;
32
38
import org .junit .Rule ;
@@ -55,5 +61,10 @@ public void splitOffsets() throws IOException {
55
61
writer .addAll (rows );
56
62
writer .close ();
57
63
Assert .assertNotNull ("Split offsets not present" , writer .splitOffsets ());
64
+ // writer offsets are the same as the ORC reader offsets
65
+ Reader reader = OrcFile .createReader (new Path (testFile .toURI ()), OrcFile .readerOptions (new Configuration ()));
66
+ Assert .assertEquals (reader .getStripes ().stream ().map (StripeInformation ::getOffset ).collect (Collectors .toList ()),
67
+ writer .splitOffsets ());
68
+ reader .close ();
58
69
}
59
70
}
You can’t perform that action at this time.
0 commit comments