Skip to content

Commit d73bcba

Browse files
committed
chore: various refactoring changes for iceberg
1 parent ff975c0 commit d73bcba

File tree

7 files changed

+370
-55
lines changed

7 files changed

+370
-55
lines changed

common/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ under the License.
5151
<groupId>org.apache.parquet</groupId>
5252
<artifactId>parquet-hadoop</artifactId>
5353
</dependency>
54+
<dependency>
55+
<groupId>org.apache.parquet</groupId>
56+
<artifactId>parquet-format-structures</artifactId>
57+
</dependency>
5458
<dependency>
5559
<groupId>org.apache.arrow</groupId>
5660
<artifactId>arrow-vector</artifactId>

common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ ColumnDescriptor getDescriptor() {
8989
return descriptor;
9090
}
9191

92+
String getPath() {
93+
return String.join(".", this.descriptor.getPath());
94+
}
95+
9296
/**
9397
* Set the batch size of this reader to be 'batchSize'. Also initializes the native column reader.
9498
*/
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.comet.parquet;
21+
22+
import java.util.Map;
23+
24+
import org.apache.hadoop.conf.Configuration;
25+
import org.apache.spark.sql.catalyst.InternalRow;
26+
import org.apache.spark.sql.execution.metric.SQLMetric;
27+
import org.apache.spark.sql.types.StructType;
28+
29+
/**
30+
* A specialized NativeBatchReader for Iceberg that accepts ParquetMetadata as a JSON string. This
31+
* allows Iceberg to pass metadata in serialized form with a two-step initialization pattern.
32+
*/
33+
public class IcebergCometNativeBatchReader extends NativeBatchReader {
34+
35+
public IcebergCometNativeBatchReader(StructType requiredSchema) {
36+
super();
37+
this.sparkSchema = requiredSchema;
38+
}
39+
40+
/** Initialize the reader using FileInfo instead of PartitionedFile. */
41+
public void init(
42+
Configuration conf,
43+
FileInfo fileInfo,
44+
byte[] parquetMetadataBytes,
45+
byte[] nativeFilter,
46+
int capacity,
47+
StructType dataSchema,
48+
boolean isCaseSensitive,
49+
boolean useFieldId,
50+
boolean ignoreMissingIds,
51+
boolean useLegacyDateTimestamp,
52+
StructType partitionSchema,
53+
InternalRow partitionValues,
54+
AbstractColumnReader[] preInitializedReaders,
55+
Map<String, SQLMetric> metrics)
56+
throws Throwable {
57+
58+
// Set parent fields
59+
this.conf = conf;
60+
this.fileInfo = fileInfo;
61+
this.footer = new ParquetMetadataSerializer().deserialize(parquetMetadataBytes);
62+
this.nativeFilter = nativeFilter;
63+
this.capacity = capacity;
64+
this.dataSchema = dataSchema;
65+
this.isCaseSensitive = isCaseSensitive;
66+
this.useFieldId = useFieldId;
67+
this.ignoreMissingIds = ignoreMissingIds;
68+
this.partitionSchema = partitionSchema;
69+
this.partitionValues = partitionValues;
70+
this.preInitializedReaders = preInitializedReaders;
71+
72+
// Call parent init method
73+
super.init();
74+
}
75+
76+
public StructType getSparkSchema() {
77+
return this.sparkSchema;
78+
}
79+
}

0 commit comments

Comments
 (0)