Skip to content

Commit

Permalink
[BugFix] Fix unknown error when query iceberg table with array type (S…
Browse files Browse the repository at this point in the history
…tarRocks#11748)

[BugFix] Fix unknown error when query iceberg table with array type
  • Loading branch information
Youngwb authored Sep 28, 2022
1 parent 61567f8 commit 69efde3
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ private static IcebergTable convertToSRTable(org.apache.iceberg.Table icebergTab
fullSchema, properties);
}

static Type convertColumnType(org.apache.iceberg.types.Type icebergType) {
public static Type convertColumnType(org.apache.iceberg.types.Type icebergType) {
if (icebergType == null) {
return Type.NULL;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,17 @@ private List<ColumnStatistic> makeColumnStatistics(List<Expression> icebergPredi
IcebergFileStats icebergFileStats = new IcebergTableStatisticCalculator(icebergTable).
generateIcebergFileStats(icebergPredicates, columns);

Map<Integer, String> idToColumnNames = columns.stream()
.filter(column -> column.type().isPrimitiveType())
.collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.name()));
Map<Integer, String> idToColumnNames = columns.stream().
filter(column -> !IcebergUtil.convertColumnType(column.type()).isUnknown())
.collect(Collectors.toMap(Types.NestedField::fieldId, Types.NestedField::name));

double recordCount = Math.max(icebergFileStats == null ? 0 : icebergFileStats.getRecordCount(), 1);
for (Map.Entry<Integer, String> idColumn : idToColumnNames.entrySet()) {
List<ColumnRefOperator> columnList = colRefToColumnMetaMap.keySet().stream().filter(
key -> key.getName().equalsIgnoreCase(idColumn.getValue())).collect(Collectors.toList());
if (columnList == null || columnList.size() != 1) {
if (columnList.size() != 1) {
LOG.debug("This column is not required column name " + idColumn.getValue() + " column list size "
+ (columnList == null ? "null" : columnList.size()));
+ columnList.size());
continue;
}

Expand All @@ -95,17 +95,17 @@ private Statistics makeTableStatistics(List<Expression> icebergPredicates,
IcebergFileStats icebergFileStats = generateIcebergFileStats(icebergPredicates, columns);

Map<Integer, String> idToColumnNames = columns.stream()
.filter(column -> column.type().isPrimitiveType())
.collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.name()));
.filter(column -> !IcebergUtil.convertColumnType(column.type()).isUnknown())
.collect(Collectors.toMap(Types.NestedField::fieldId, Types.NestedField::name));

Statistics.Builder statisticsBuilder = Statistics.builder();
double recordCount = Math.max(icebergFileStats == null ? 0 : icebergFileStats.getRecordCount(), 1);
for (Map.Entry<Integer, String> idColumn : idToColumnNames.entrySet()) {
List<ColumnRefOperator> columnList = colRefToColumnMetaMap.keySet().stream().filter(
key -> key.getName().equalsIgnoreCase(idColumn.getValue())).collect(Collectors.toList());
if (columnList == null || columnList.size() != 1) {
if (columnList.size() != 1) {
LOG.debug("This column is not required column name " + idColumn.getValue() + " column list size "
+ (columnList == null ? "null" : columnList.size()));
+ columnList.size());
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.starrocks.catalog.Column;
import com.starrocks.catalog.Type;
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
import com.starrocks.sql.optimizer.statistics.ColumnStatistic;
import com.starrocks.sql.optimizer.statistics.Statistics;
import mockit.Expectations;
import mockit.Mocked;
Expand Down Expand Up @@ -76,4 +77,37 @@ public void testMakeTableStatisticsWithStructField() {
Map<Integer, Object> result = IcebergFileStats.toMap(idToTypeMapping, bounds);
Assert.assertNotNull(result);
}

@Test
public void testMakeTableStatisticsWithArrayField(@Mocked Table iTable) {
List<Types.NestedField> fields = new ArrayList<>();
fields.add(Types.NestedField.of(1, false, "col1", new Types.LongType()));
fields.add(Types.NestedField.of(2, false, "col2", new Types.DateType()));
fields.add(Types.NestedField.of(3, false, "colArray",
Types.ListType.ofOptional(4, new Types.IntegerType())));
Schema schema = new Schema(fields);

new Expectations() {
{
iTable.schema();
result = schema;
}
{
// empty iceberg's snapshot is null or snapshot is not null but no datafile.
// so here mock iceberg table with null snapshot
iTable.currentSnapshot();
result = null;
}
};

Map<ColumnRefOperator, Column> colRefToColumnMetaMap = new HashMap<ColumnRefOperator, Column>();
ColumnRefOperator columnRefOperator1 = new ColumnRefOperator(1000, Type.BIGINT, "col1", true);
ColumnRefOperator columnRefOperator2 = new ColumnRefOperator(1001, Type.ARRAY_INT, "colArray", true);
colRefToColumnMetaMap.put(columnRefOperator1, new Column("col1", Type.BIGINT));
colRefToColumnMetaMap.put(columnRefOperator2, new Column("colArray", Type.ARRAY_INT));
Statistics statistics = IcebergTableStatisticCalculator.getTableStatistics(null, iTable, colRefToColumnMetaMap);
Assert.assertNotNull(statistics);
ColumnStatistic arrayStatistic = statistics.getColumnStatistic(columnRefOperator2);
Assert.assertNotNull(arrayStatistic);
}
}

0 comments on commit 69efde3

Please sign in to comment.