[BugFix] Fix unknown error when query iceberg table with array type (S…

…tarRocks#11748) [BugFix] Fix unknown error when query iceberg table with array type
rongrong · Sep 28, 2022 · 69efde3 · 69efde3
1 parent 61567f8
commit 69efde3
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 10 deletions.
diff --git a/fe/fe-core/src/main/java/com/starrocks/external/iceberg/IcebergUtil.java b/fe/fe-core/src/main/java/com/starrocks/external/iceberg/IcebergUtil.java
@@ -215,7 +215,7 @@ private static IcebergTable convertToSRTable(org.apache.iceberg.Table icebergTab
                 fullSchema, properties);
     }
 
-    static Type convertColumnType(org.apache.iceberg.types.Type icebergType) {
+    public static Type convertColumnType(org.apache.iceberg.types.Type icebergType) {
         if (icebergType == null) {
             return Type.NULL;
         }

diff --git a/...re/src/main/java/com/starrocks/external/iceberg/cost/IcebergTableStatisticCalculator.java b/...re/src/main/java/com/starrocks/external/iceberg/cost/IcebergTableStatisticCalculator.java
@@ -67,17 +67,17 @@ private List<ColumnStatistic> makeColumnStatistics(List<Expression> icebergPredi
         IcebergFileStats icebergFileStats = new IcebergTableStatisticCalculator(icebergTable).
                 generateIcebergFileStats(icebergPredicates, columns);
 
-        Map<Integer, String> idToColumnNames = columns.stream()
-                .filter(column -> column.type().isPrimitiveType())
-                .collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.name()));
+        Map<Integer, String> idToColumnNames = columns.stream().
+                filter(column -> !IcebergUtil.convertColumnType(column.type()).isUnknown())
+                .collect(Collectors.toMap(Types.NestedField::fieldId, Types.NestedField::name));
 
         double recordCount = Math.max(icebergFileStats == null ? 0 : icebergFileStats.getRecordCount(), 1);
         for (Map.Entry<Integer, String> idColumn : idToColumnNames.entrySet()) {
             List<ColumnRefOperator> columnList = colRefToColumnMetaMap.keySet().stream().filter(
                     key -> key.getName().equalsIgnoreCase(idColumn.getValue())).collect(Collectors.toList());
-            if (columnList == null || columnList.size() != 1) {
+            if (columnList.size() != 1) {
                 LOG.debug("This column is not required column name " + idColumn.getValue() + " column list size "
-                        + (columnList == null ? "null" : columnList.size()));
+                        + columnList.size());
                 continue;
             }
 
@@ -95,17 +95,17 @@ private Statistics makeTableStatistics(List<Expression> icebergPredicates,
         IcebergFileStats icebergFileStats = generateIcebergFileStats(icebergPredicates, columns);
 
         Map<Integer, String> idToColumnNames = columns.stream()
-                .filter(column -> column.type().isPrimitiveType())
-                .collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.name()));
+                .filter(column -> !IcebergUtil.convertColumnType(column.type()).isUnknown())
+                .collect(Collectors.toMap(Types.NestedField::fieldId, Types.NestedField::name));
 
         Statistics.Builder statisticsBuilder = Statistics.builder();
         double recordCount = Math.max(icebergFileStats == null ? 0 : icebergFileStats.getRecordCount(), 1);
         for (Map.Entry<Integer, String> idColumn : idToColumnNames.entrySet()) {
             List<ColumnRefOperator> columnList = colRefToColumnMetaMap.keySet().stream().filter(
                     key -> key.getName().equalsIgnoreCase(idColumn.getValue())).collect(Collectors.toList());
-            if (columnList == null || columnList.size() != 1) {
+            if (columnList.size() != 1) {
                 LOG.debug("This column is not required column name " + idColumn.getValue() + " column list size "
-                        + (columnList == null ? "null" : columnList.size()));
+                        + columnList.size());
                 continue;
             }
 

diff --git a/...rc/test/java/com/starrocks/external/iceberg/cost/IcebergTableStatisticCalculatorTest.java b/...rc/test/java/com/starrocks/external/iceberg/cost/IcebergTableStatisticCalculatorTest.java
@@ -6,6 +6,7 @@
 import com.starrocks.catalog.Column;
 import com.starrocks.catalog.Type;
 import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
+import com.starrocks.sql.optimizer.statistics.ColumnStatistic;
 import com.starrocks.sql.optimizer.statistics.Statistics;
 import mockit.Expectations;
 import mockit.Mocked;
@@ -76,4 +77,37 @@ public void testMakeTableStatisticsWithStructField() {
         Map<Integer, Object> result = IcebergFileStats.toMap(idToTypeMapping, bounds);
         Assert.assertNotNull(result);
     }
+
+    @Test
+    public void testMakeTableStatisticsWithArrayField(@Mocked Table iTable) {
+        List<Types.NestedField> fields = new ArrayList<>();
+        fields.add(Types.NestedField.of(1, false, "col1", new Types.LongType()));
+        fields.add(Types.NestedField.of(2, false, "col2", new Types.DateType()));
+        fields.add(Types.NestedField.of(3, false, "colArray",
+                Types.ListType.ofOptional(4, new Types.IntegerType())));
+        Schema schema = new Schema(fields);
+
+        new Expectations() {
+            {
+                iTable.schema();
+                result = schema;
+            }
+            {
+                // empty iceberg's snapshot is null or snapshot is not null but no datafile.
+                // so here mock iceberg table with null snapshot
+                iTable.currentSnapshot();
+                result = null;
+            }
+        };
+
+        Map<ColumnRefOperator, Column> colRefToColumnMetaMap = new HashMap<ColumnRefOperator, Column>();
+        ColumnRefOperator columnRefOperator1 = new ColumnRefOperator(1000, Type.BIGINT, "col1", true);
+        ColumnRefOperator columnRefOperator2 = new ColumnRefOperator(1001, Type.ARRAY_INT, "colArray", true);
+        colRefToColumnMetaMap.put(columnRefOperator1, new Column("col1", Type.BIGINT));
+        colRefToColumnMetaMap.put(columnRefOperator2, new Column("colArray", Type.ARRAY_INT));
+        Statistics statistics = IcebergTableStatisticCalculator.getTableStatistics(null, iTable, colRefToColumnMetaMap);
+        Assert.assertNotNull(statistics);
+        ColumnStatistic arrayStatistic = statistics.getColumnStatistic(columnRefOperator2);
+        Assert.assertNotNull(arrayStatistic);
+    }
 }