Skip to content

Commit 468e3f9

Browse files
committed
Core: Add a util to read write partition stats
1 parent d6c8358 commit 468e3f9

File tree

5 files changed

+714
-1
lines changed

5 files changed

+714
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,361 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.iceberg;
20+
21+
import java.util.Objects;
22+
import org.apache.avro.Schema;
23+
import org.apache.avro.generic.IndexedRecord;
24+
import org.apache.iceberg.avro.AvroSchemaUtil;
25+
import org.apache.iceberg.types.Types;
26+
27+
public class PartitionEntry implements IndexedRecord {
28+
private PartitionData partitionData;
29+
private int specId;
30+
private long dataRecordCount;
31+
private int dataFileCount;
32+
private long dataFileSizeInBytes;
33+
private long posDeleteRecordCount;
34+
private int posDeleteFileCount;
35+
private long eqDeleteRecordCount;
36+
private int eqDeleteFileCount;
37+
// Optional accurate count of records in a partition after applying the delete files if any
38+
private long totalRecordCount;
39+
// Commit time of snapshot that last updated this partition
40+
private long lastUpdatedAt;
41+
// ID of snapshot that last updated this partition
42+
private long lastUpdatedSnapshotId;
43+
44+
public enum Column {
45+
PARTITION_DATA,
46+
SPEC_ID,
47+
DATA_RECORD_COUNT,
48+
DATA_FILE_COUNT,
49+
DATA_FILE_SIZE_IN_BYTES,
50+
POSITION_DELETE_RECORD_COUNT,
51+
POSITION_DELETE_FILE_COUNT,
52+
EQUALITY_DELETE_RECORD_COUNT,
53+
EQUALITY_DELETE_FILE_COUNT,
54+
TOTAL_RECORD_COUNT,
55+
LAST_UPDATED_AT,
56+
LAST_UPDATED_SNAPSHOT_ID
57+
}
58+
59+
private PartitionEntry() {}
60+
61+
public static Builder builder() {
62+
return new Builder();
63+
}
64+
65+
public PartitionData partitionData() {
66+
return partitionData;
67+
}
68+
69+
public int specId() {
70+
return specId;
71+
}
72+
73+
public long dataRecordCount() {
74+
return dataRecordCount;
75+
}
76+
77+
public int dataFileCount() {
78+
return dataFileCount;
79+
}
80+
81+
public long dataFileSizeInBytes() {
82+
return dataFileSizeInBytes;
83+
}
84+
85+
public long posDeleteRecordCount() {
86+
return posDeleteRecordCount;
87+
}
88+
89+
public int posDeleteFileCount() {
90+
return posDeleteFileCount;
91+
}
92+
93+
public long eqDeleteRecordCount() {
94+
return eqDeleteRecordCount;
95+
}
96+
97+
public int eqDeleteFileCount() {
98+
return eqDeleteFileCount;
99+
}
100+
101+
public long totalRecordCount() {
102+
return totalRecordCount;
103+
}
104+
105+
public long lastUpdatedAt() {
106+
return lastUpdatedAt;
107+
}
108+
109+
public long lastUpdatedSnapshotId() {
110+
return lastUpdatedSnapshotId;
111+
}
112+
113+
@Override
114+
public void put(int i, Object v) {
115+
switch (i) {
116+
case 0:
117+
this.partitionData = (PartitionData) v;
118+
return;
119+
case 1:
120+
this.specId = (int) v;
121+
return;
122+
case 2:
123+
this.dataRecordCount = (long) v;
124+
return;
125+
case 3:
126+
this.dataFileCount = (int) v;
127+
return;
128+
case 4:
129+
this.dataFileSizeInBytes = (long) v;
130+
return;
131+
case 5:
132+
this.posDeleteRecordCount = (long) v;
133+
return;
134+
case 6:
135+
this.posDeleteFileCount = (int) v;
136+
return;
137+
case 7:
138+
this.eqDeleteRecordCount = (long) v;
139+
return;
140+
case 8:
141+
this.eqDeleteFileCount = (int) v;
142+
return;
143+
case 9:
144+
this.totalRecordCount = (long) v;
145+
return;
146+
case 10:
147+
this.lastUpdatedAt = (long) v;
148+
return;
149+
case 11:
150+
this.lastUpdatedSnapshotId = (long) v;
151+
return;
152+
default:
153+
throw new UnsupportedOperationException("Unknown field ordinal: " + i);
154+
}
155+
}
156+
157+
@Override
158+
public Object get(int i) {
159+
switch (i) {
160+
case 0:
161+
return partitionData;
162+
case 1:
163+
return specId;
164+
case 2:
165+
return dataRecordCount;
166+
case 3:
167+
return dataFileCount;
168+
case 4:
169+
return dataFileSizeInBytes;
170+
case 5:
171+
return posDeleteRecordCount;
172+
case 6:
173+
return posDeleteFileCount;
174+
case 7:
175+
return eqDeleteRecordCount;
176+
case 8:
177+
return eqDeleteFileCount;
178+
case 9:
179+
return totalRecordCount;
180+
case 10:
181+
return lastUpdatedAt;
182+
case 11:
183+
return lastUpdatedSnapshotId;
184+
default:
185+
throw new UnsupportedOperationException("Unknown field ordinal: " + i);
186+
}
187+
}
188+
189+
@Override
190+
public Schema getSchema() {
191+
return prepareAvroSchema(partitionData.getPartitionType());
192+
}
193+
194+
@Override
195+
@SuppressWarnings("checkstyle:CyclomaticComplexity")
196+
public boolean equals(Object o) {
197+
if (this == o) {
198+
return true;
199+
} else if (!(o instanceof PartitionEntry)) {
200+
return false;
201+
}
202+
203+
PartitionEntry that = (PartitionEntry) o;
204+
return partitionData.equals(that.partitionData)
205+
&& specId == that.specId
206+
&& dataRecordCount == that.dataRecordCount
207+
&& dataFileCount == that.dataFileCount
208+
&& dataFileSizeInBytes == that.dataFileSizeInBytes
209+
&& posDeleteRecordCount == that.posDeleteRecordCount
210+
&& posDeleteFileCount == that.posDeleteFileCount
211+
&& eqDeleteRecordCount == that.eqDeleteRecordCount
212+
&& eqDeleteFileCount == that.eqDeleteFileCount
213+
&& totalRecordCount == that.totalRecordCount
214+
&& lastUpdatedAt == that.lastUpdatedAt
215+
&& lastUpdatedSnapshotId == that.lastUpdatedSnapshotId;
216+
}
217+
218+
@Override
219+
public int hashCode() {
220+
return Objects.hash(
221+
partitionData,
222+
specId,
223+
dataRecordCount,
224+
dataFileCount,
225+
dataFileSizeInBytes,
226+
posDeleteRecordCount,
227+
posDeleteFileCount,
228+
eqDeleteRecordCount,
229+
eqDeleteFileCount,
230+
totalRecordCount,
231+
lastUpdatedAt,
232+
lastUpdatedSnapshotId);
233+
}
234+
235+
public static org.apache.iceberg.Schema icebergSchema(Types.StructType partitionType) {
236+
if (partitionType.fields().isEmpty()) {
237+
throw new IllegalArgumentException("getting schema for an unpartitioned table");
238+
}
239+
240+
return new org.apache.iceberg.Schema(
241+
Types.NestedField.required(1, Column.PARTITION_DATA.name(), partitionType),
242+
Types.NestedField.required(2, Column.SPEC_ID.name(), Types.IntegerType.get()),
243+
Types.NestedField.required(3, Column.DATA_RECORD_COUNT.name(), Types.LongType.get()),
244+
Types.NestedField.required(4, Column.DATA_FILE_COUNT.name(), Types.IntegerType.get()),
245+
Types.NestedField.required(5, Column.DATA_FILE_SIZE_IN_BYTES.name(), Types.LongType.get()),
246+
Types.NestedField.optional(
247+
6, Column.POSITION_DELETE_RECORD_COUNT.name(), Types.LongType.get()),
248+
Types.NestedField.optional(
249+
7, Column.POSITION_DELETE_FILE_COUNT.name(), Types.IntegerType.get()),
250+
Types.NestedField.optional(
251+
8, Column.EQUALITY_DELETE_RECORD_COUNT.name(), Types.LongType.get()),
252+
Types.NestedField.optional(
253+
9, Column.EQUALITY_DELETE_FILE_COUNT.name(), Types.IntegerType.get()),
254+
Types.NestedField.optional(10, Column.TOTAL_RECORD_COUNT.name(), Types.LongType.get()),
255+
Types.NestedField.optional(11, Column.LAST_UPDATED_AT.name(), Types.LongType.get()),
256+
Types.NestedField.optional(
257+
12, Column.LAST_UPDATED_SNAPSHOT_ID.name(), Types.LongType.get()));
258+
}
259+
260+
private static Schema prepareAvroSchema(Types.StructType partitionType) {
261+
return AvroSchemaUtil.convert(icebergSchema(partitionType), "partitionEntry");
262+
}
263+
264+
public static class Builder {
265+
private PartitionData partitionData;
266+
private int specId;
267+
private long dataRecordCount;
268+
private int dataFileCount;
269+
private long dataFileSizeInBytes;
270+
private long posDeleteRecordCount;
271+
private int posDeleteFileCount;
272+
private long eqDeleteRecordCount;
273+
private int eqDeleteFileCount;
274+
private long totalRecordCount;
275+
private long lastUpdatedAt;
276+
private long lastUpdatedSnapshotId;
277+
278+
private Builder() {}
279+
280+
public Builder withPartitionData(PartitionData newPartitionData) {
281+
this.partitionData = newPartitionData;
282+
return this;
283+
}
284+
285+
public Builder withSpecId(int newSpecId) {
286+
this.specId = newSpecId;
287+
return this;
288+
}
289+
290+
public Builder withDataRecordCount(long newDataRecordCount) {
291+
this.dataRecordCount = newDataRecordCount;
292+
return this;
293+
}
294+
295+
public Builder withDataFileCount(int newDataFileCount) {
296+
this.dataFileCount = newDataFileCount;
297+
return this;
298+
}
299+
300+
public Builder withDataFileSizeInBytes(long newDataFileSizeInBytes) {
301+
this.dataFileSizeInBytes = newDataFileSizeInBytes;
302+
return this;
303+
}
304+
305+
public Builder withPosDeleteRecordCount(Long newPosDeleteRecordCount) {
306+
this.posDeleteRecordCount = newPosDeleteRecordCount;
307+
return this;
308+
}
309+
310+
public Builder withPosDeleteFileCount(Integer newPosDeleteFileCount) {
311+
this.posDeleteFileCount = newPosDeleteFileCount;
312+
return this;
313+
}
314+
315+
public Builder withEqDeleteRecordCount(Long newEqDeleteRecordCount) {
316+
this.eqDeleteRecordCount = newEqDeleteRecordCount;
317+
return this;
318+
}
319+
320+
public Builder withEqDeleteFileCount(Integer newEqDeleteFileCount) {
321+
this.eqDeleteFileCount = newEqDeleteFileCount;
322+
return this;
323+
}
324+
325+
public Builder withTotalRecordCount(Long newTotalRecordCount) {
326+
this.totalRecordCount = newTotalRecordCount;
327+
return this;
328+
}
329+
330+
public Builder withLastUpdatedAt(Long newLastUpdatedAt) {
331+
this.lastUpdatedAt = newLastUpdatedAt;
332+
return this;
333+
}
334+
335+
public Builder withLastUpdatedSnapshotId(Long newLastUpdatedSnapshotId) {
336+
this.lastUpdatedSnapshotId = newLastUpdatedSnapshotId;
337+
return this;
338+
}
339+
340+
public PartitionEntry newInstance() {
341+
return new PartitionEntry();
342+
}
343+
344+
public PartitionEntry build() {
345+
PartitionEntry partition = new PartitionEntry();
346+
partition.partitionData = partitionData;
347+
partition.specId = specId;
348+
partition.dataRecordCount = dataRecordCount;
349+
partition.dataFileCount = dataFileCount;
350+
partition.dataFileSizeInBytes = dataFileSizeInBytes;
351+
partition.posDeleteRecordCount = posDeleteRecordCount;
352+
partition.posDeleteFileCount = posDeleteFileCount;
353+
partition.eqDeleteRecordCount = eqDeleteRecordCount;
354+
partition.eqDeleteFileCount = eqDeleteFileCount;
355+
partition.totalRecordCount = totalRecordCount;
356+
partition.lastUpdatedAt = lastUpdatedAt;
357+
partition.lastUpdatedSnapshotId = lastUpdatedSnapshotId;
358+
return partition;
359+
}
360+
}
361+
}

core/src/main/java/org/apache/iceberg/Partitioning.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,10 @@ public static StructType groupingKeyType(Schema schema, Collection<PartitionSpec
238238
* @return the constructed unified partition type
239239
*/
240240
public static StructType partitionType(Table table) {
241-
Collection<PartitionSpec> specs = table.specs().values();
241+
return partitionType(table.specs().values());
242+
}
243+
244+
public static StructType partitionType(Collection<PartitionSpec> specs) {
242245
return buildPartitionProjectionType("table partition", specs, allFieldIds(specs));
243246
}
244247

0 commit comments

Comments
 (0)