apache · Jackie-Jiang · Nov 20, 2023 · Oct 17, 2023 · Oct 19, 2023 · Oct 19, 2023
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
@@ -18,6 +18,8 @@
  */
 package org.apache.pinot.common.data;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
+import java.io.IOException;
 import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayList;
@@ -404,4 +406,47 @@ private String getRandomOrderJsonString(String[] fields) {
     jsonString.append('}');
     return jsonString.toString();
   }
+
+  @DataProvider(name = "nullableCases")
+  public static Object[][] nullableCases() {
+    return new Object[][] {
+        //            declared notNull, returned notNull
+        new Object[] {null},
+        new Object[] {false},
+        new Object[] {true}
+    };
+  }
+
+  @Test(dataProvider = "nullableCases")
+  void testNullability(Boolean declared)
+      throws IOException {
+    boolean expected = declared == Boolean.TRUE;
+    String json;
+    if (declared == null) {
+      json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\"}";
+    } else {
+      json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\", \"notNull\": " + declared + "}";
+    }
+    DimensionFieldSpec fieldSpec = JsonUtils.stringToObject(json, DimensionFieldSpec.class);
+
+    Assert.assertEquals(fieldSpec.isNotNull(), expected, "Unexpected notNull read when declared as " + declared);
+    Assert.assertEquals(fieldSpec.isNullable(), !expected, "Unexpected nullable read when declared as " + declared);
+  }
+
+  @Test(dataProvider = "nullableCases")
+  void testNullabilityIdempotency(Boolean declared)
+      throws JsonProcessingException {
+    String json;
+    if (declared == null) {
+      json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\"}";
+    } else {
+      json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\", \"notNull\": " + declared + "}";
+    }
+    DimensionFieldSpec fieldSpec = JsonUtils.stringToObject(json, DimensionFieldSpec.class);
+
+    String serialized = JsonUtils.objectToString(fieldSpec);
+    DimensionFieldSpec deserialized = JsonUtils.stringToObject(serialized, DimensionFieldSpec.class);
+
+    Assert.assertEquals(deserialized, fieldSpec, "Changes detected while checking serialize/deserialize idempotency");
+  }
 }
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/SchemaTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/SchemaTest.java
@@ -18,7 +18,9 @@
  */
 package org.apache.pinot.common.data;
 
+import com.fasterxml.jackson.databind.JsonNode;
 import java.io.File;
+import java.io.IOException;
 import java.math.BigDecimal;
 import java.net.URL;
 import java.sql.Timestamp;
@@ -33,6 +35,7 @@
 import org.apache.pinot.spi.data.TimeGranularitySpec;
 import org.apache.pinot.spi.data.TimeGranularitySpec.TimeFormat;
 import org.apache.pinot.spi.utils.BytesUtils;
+import org.apache.pinot.spi.utils.JsonUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testng.Assert;
@@ -320,6 +323,29 @@ public void testSerializeDeserialize()
     Assert.assertNotEquals(jsonSchemaToCompare, jsonSchema);
   }
 
+  @Test
+  public void testSerializeDeserializeOptions()
+      throws IOException {
+    String json = "{\n"
+        + "  \"primaryKeyColumns\" : null,\n"
+        + "  \"timeFieldSpec\" : null,\n"
+        + "  \"schemaName\" : null,\n"
+        + "  \"enableColumnBasedNullHandling\" : true,\n"
+        + "  \"dimensionFieldSpecs\" : [ ],\n"
+        + "  \"metricFieldSpecs\" : [ ],\n"
+        + "  \"dateTimeFieldSpecs\" : [ ]\n"
+        + "}";
+    JsonNode expectedNode = JsonUtils.stringToJsonNode(json);
+
+    Schema schema = JsonUtils.jsonNodeToObject(expectedNode, Schema.class);
+    Assert.assertTrue(schema.isEnableColumnBasedNullHandling(), "Column null handling should be enabled");
+
+    String serialized = JsonUtils.objectToString(schema);
+    Schema deserialized = JsonUtils.stringToObject(serialized, Schema.class);
+
+    Assert.assertEquals(deserialized, schema, "Changes detected while checking serialize/deserialize idempotency");
+  }
+
   @Test
   public void testSimpleDateFormat()
       throws Exception {

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/selection/SelectionOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/query/selection/SelectionOperatorUtils.java
@@ -354,6 +354,8 @@ public static void mergeWithOrdering(SelectionResultsBlock mergedBlock, Selectio
 
   /**
    * Build a {@link DataTable} from a {@link Collection} of selection rows with {@link DataSchema}. (Server side)
+   *
+   * This method is allowed to modify the given rows. Specifically, it may remove nulls cells from it.
    */
   public static DataTable getDataTableFromRows(Collection<Object[]> rows, DataSchema dataSchema,
       boolean nullHandlingEnabled)

diff --git a/pinot-integration-tests/src/test/resources/test_null_handling.schema b/pinot-integration-tests/src/test/resources/test_null_handling.schema
@@ -30,4 +30,3 @@
   },
   "schemaName": "mytable"
 }
-
diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/type/TypeFactory.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.query.type;
 
 import java.util.Map;
+import java.util.function.Predicate;
 import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeSystem;
@@ -31,8 +32,8 @@
  * Extends Java-base TypeFactory from Calcite.
  *
  * <p>{@link JavaTypeFactoryImpl} is used here because we are not overriding much of the TypeFactory methods
- * required by Calcite. We will start extending {@link SqlTypeFactoryImpl} or even {@link RelDataTypeFactory}
- * when necessary for Pinot to override such mechanism.
+ * required by Calcite. We will start extending {@link org.apache.calcite.sql.type.SqlTypeFactoryImpl} or even
+ * {@link org.apache.calcite.rel.type.RelDataTypeFactory} when necessary for Pinot to override such mechanism.
  *
  * <p>Noted that {@link JavaTypeFactoryImpl} is subject to change. Please pay extra attention to this class when
  * upgrading Calcite versions.
@@ -45,20 +46,36 @@ public TypeFactory(RelDataTypeSystem typeSystem) {
 
   public RelDataType createRelDataTypeFromSchema(Schema schema) {
     Builder builder = new Builder(this);
+    Predicate<FieldSpec> isNullable;
+    if (schema.isEnableColumnBasedNullHandling()) {
+      isNullable = FieldSpec::isNullable;
+    } else {
+      isNullable = fieldSpec -> false;
+    }
     for (Map.Entry<String, FieldSpec> e : schema.getFieldSpecMap().entrySet()) {
-      builder.add(e.getKey(), toRelDataType(e.getValue()));
+      builder.add(e.getKey(), toRelDataType(e.getValue(), isNullable));
     }
     return builder.build();
   }
 
-  private RelDataType toRelDataType(FieldSpec fieldSpec) {
+  private RelDataType toRelDataType(FieldSpec fieldSpec, Predicate<FieldSpec> isNullable) {
+    RelDataType type = createSqlType(getSqlTypeName(fieldSpec));
+    boolean isArray = !fieldSpec.isSingleValueField();
+    if (isArray) {
+      type = createArrayType(type, -1);
+    }
+    if (isNullable.test(fieldSpec)) {
+      type = createTypeWithNullability(type, true);
+    }
+    return type;
+  }
+
+  private SqlTypeName getSqlTypeName(FieldSpec fieldSpec) {
     switch (fieldSpec.getDataType()) {
       case INT:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.INTEGER)
-            : createArrayType(createSqlType(SqlTypeName.INTEGER), -1);
+        return SqlTypeName.INTEGER;
       case LONG:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.BIGINT)
-            : createArrayType(createSqlType(SqlTypeName.BIGINT), -1);
+        return SqlTypeName.BIGINT;
       // Map float and double to the same RelDataType so that queries like
       // `select count(*) from table where aFloatColumn = 0.05` works correctly in multi-stage query engine.
       //
@@ -71,34 +88,32 @@ private RelDataType toRelDataType(FieldSpec fieldSpec) {
       // With float and double mapped to the same RelDataType, the behavior in multi-stage query engine will be the same
       // as the query in v1 query engine.
       case FLOAT:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.DOUBLE)
-            : createArrayType(createSqlType(SqlTypeName.REAL), -1);
+        if (fieldSpec.isSingleValueField()) {
+          return SqlTypeName.DOUBLE;
+        } else {
+          // TODO: This may be wrong. The reason why we want to use DOUBLE in single value float may also apply here
+          return SqlTypeName.REAL;
+        }
       case DOUBLE:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.DOUBLE)
-            : createArrayType(createSqlType(SqlTypeName.DOUBLE), -1);
+        return SqlTypeName.DOUBLE;
       case BOOLEAN:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.BOOLEAN)
-            : createArrayType(createSqlType(SqlTypeName.BOOLEAN), -1);
+        return SqlTypeName.BOOLEAN;
       case TIMESTAMP:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.TIMESTAMP)
-            : createArrayType(createSqlType(SqlTypeName.TIMESTAMP), -1);
+        return SqlTypeName.TIMESTAMP;
       case STRING:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.VARCHAR)
-            : createArrayType(createSqlType(SqlTypeName.VARCHAR), -1);
+        return SqlTypeName.VARCHAR;
       case BYTES:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.VARBINARY)
-            : createArrayType(createSqlType(SqlTypeName.VARBINARY), -1);
+        return SqlTypeName.VARBINARY;
       case BIG_DECIMAL:
-        return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.DECIMAL)
-            : createArrayType(createSqlType(SqlTypeName.DECIMAL), -1);
+        return SqlTypeName.DECIMAL;
       case JSON:
-        return createSqlType(SqlTypeName.VARCHAR);
+        return SqlTypeName.VARCHAR;
       case LIST:
         // TODO: support LIST, MV column should go fall into this category.
       case STRUCT:
       case MAP:
       default:
-        String message = String.format("Unsupported type: %s ", fieldSpec.getDataType().toString());
+        String message = String.format("Unsupported type: %s ", fieldSpec.getDataType());
         throw new UnsupportedOperationException(message);
     }
   }

diff --git a/...ery-planner/src/test/java/org/apache/pinot/query/queries/ResourceBasedQueryPlansTest.java b/...ery-planner/src/test/java/org/apache/pinot/query/queries/ResourceBasedQueryPlansTest.java
@@ -47,12 +47,14 @@ public class ResourceBasedQueryPlansTest extends QueryEnvironmentTestBase {
   private static final String FILE_FILTER_PROPERTY = "pinot.fileFilter";
 
   @Test(dataProvider = "testResourceQueryPlannerTestCaseProviderHappyPath")
-  public void testQueryExplainPlansAndQueryPlanConversion(String testCaseName, String query, String output) {
+  public void testQueryExplainPlansAndQueryPlanConversion(String testCaseName, String description, String query,
+      String output) {
     try {
       long requestId = RANDOM_REQUEST_ID_GEN.nextLong();
       String explainedPlan = _queryEnvironment.explainQuery(query, requestId);
       Assert.assertEquals(explainedPlan, output,
-          String.format("Test case %s for query %s doesn't match expected output: %s", testCaseName, query, output));
+          String.format("Test case %s for query %s (%s) doesn't match expected output: %s", testCaseName, description,
+              query, output));
       // use a regex to exclude the
       String queryWithoutExplainPlan = query.replaceFirst(EXPLAIN_REGEX, "");
       DispatchableSubPlan dispatchableSubPlan = _queryEnvironment.planQuery(queryWithoutExplainPlan);
@@ -105,7 +107,7 @@ private static Object[][] testResourceQueryPlannerTestCaseProviderHappyPath()
           String sql = queryCase._sql;
           List<String> orgOutput = queryCase._output;
           String concatenatedOutput = StringUtils.join(orgOutput, "");
-          Object[] testEntry = new Object[]{testCaseName, sql, concatenatedOutput};
+          Object[] testEntry = new Object[]{testCaseName, queryCase._description, sql, concatenatedOutput};
           providerContent.add(testEntry);
         }
       }