Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
e5d9754
Add nullability to FieldSpec and use it in TypeFactory
gortiz Oct 17, 2023
b7cc9aa
Change NullValueIndexType to skip not nullable columns
gortiz Oct 19, 2023
7f52f65
Enable NullValueIndexType by default in order to pass tests.
gortiz Oct 19, 2023
dcf1e33
Remove the assumption of being nullable for columns where defaultNull…
gortiz Oct 23, 2023
9d80da3
Remove unnecessary changes in TypeSystem
gortiz Oct 25, 2023
6d02a0e
Prepare test framework to add more tests
gortiz Oct 25, 2023
97660bb
Remove space
gortiz Oct 26, 2023
64ea0cd
Actually ignore tests cases marked as ingored instead of not generati…
gortiz Oct 26, 2023
6a159ef
Add some null tests
gortiz Oct 26, 2023
c8105aa
Ignore some tests that started to fail once nullHandling.json was mod…
gortiz Oct 26, 2023
675601a
Fix false errors in test
gortiz Oct 26, 2023
8f108be
Honor EnableNullHandling in tests
gortiz Oct 31, 2023
9ae4edb
Add some extra tests
gortiz Oct 31, 2023
63146e7
Change nullable semantic to relay only on FieldSpec
gortiz Nov 3, 2023
e918c43
Adapt NullValueIndexTypeTest to not nullable FieldSpec.isNullable()
gortiz Nov 6, 2023
6843a42
Adapt TypeFactoryTest to nullable by default
gortiz Nov 6, 2023
698573c
Fix NullValueIndexTypeTest
gortiz Nov 6, 2023
fea29b8
Change QueryEnvironmentTestBase to be not nullable.
gortiz Nov 6, 2023
607c764
Small change to make ResourceBasedQueryPlansTest run faster by using …
gortiz Nov 6, 2023
e1a2e49
Add the concept of null handling at schema level
gortiz Nov 7, 2023
3352c10
Move Nullhandlig to its own class
gortiz Nov 7, 2023
6155ab6
Modify SegmentColumnarIndexCreator to support NullHandling
gortiz Nov 7, 2023
a1eebdf
Add license header
gortiz Nov 7, 2023
c77c480
Add javadoc on NullHandling.supportsV2
gortiz Nov 7, 2023
9d48652
Add TODO
gortiz Nov 7, 2023
242c35e
Adapt new columns empty columns to NullHandling
gortiz Nov 7, 2023
2df1298
Improve error message when no table config is present
gortiz Nov 7, 2023
a9ba1e4
Make Schema.Options and NullHandling Serializables
gortiz Nov 7, 2023
7d6adc2
Add options to special serialization method of Schema
gortiz Nov 7, 2023
e0d14e2
Configure test_null_handling.schema to use column mode
gortiz Nov 7, 2023
6f8e83a
Change SchemaTest.testSerializeDeserializeOptions to compare JsonNodes
gortiz Nov 8, 2023
5e1815d
Partial improvement in tests
gortiz Nov 8, 2023
11e597f
Simplify null handling options into a single boolean
gortiz Nov 10, 2023
ac5c5e5
Remove check that fails fast when running v2 with null handling using…
gortiz Nov 10, 2023
c4cd005
Merge remote-tracking branch 'origin/master' into schema-null-handling
gortiz Nov 10, 2023
9088835
Fix issue in SchemaTest
gortiz Nov 10, 2023
7331ba9
Add _enableColumnBasedNullHandling to equals
gortiz Nov 10, 2023
3785317
Fix some tests
gortiz Nov 14, 2023
bdfd595
Make `isNullable()` primitive boolean
gortiz Nov 14, 2023
ccf1ef0
Update Javadoc
gortiz Nov 15, 2023
802e9c4
Subsitute `_nullable` with `_notNull`
gortiz Nov 15, 2023
cf7de13
Merge remote-tracking branch 'origin/master' into schema-null-handling
gortiz Nov 15, 2023
71d46af
Remove ignores in CountDistinct.json
gortiz Nov 15, 2023
17ec649
Ignore test incorrect tests in CountDistinct.json
gortiz Nov 16, 2023
9e30389
Report ignored tests again
gortiz Nov 16, 2023
e73aac4
Remove parallel testng modifier
gortiz Nov 17, 2023
3dd0886
Remove test improvements not specific to this PR
gortiz Nov 17, 2023
7716fe0
Fix style
gortiz Nov 17, 2023
882aafd
Remove sql from assertions
gortiz Nov 17, 2023
9c2d47f
Update pinot-segment-local/src/main/java/org/apache/pinot/segment/loc…
gortiz Nov 17, 2023
9402a7c
Consider null index enabled also when enableColumnBasedNullHandling i…
gortiz Nov 17, 2023
3a5e3f6
Merge branch 'schema-null-handling' of github.com:gortiz/pinot into s…
gortiz Nov 17, 2023
278c11a
fix test
Nov 20, 2023
05234fe
Fix Mutable segment null handling, test, and minor cleanup
Jackie-Jiang Nov 20, 2023
c85b991
Fix lint
Jackie-Jiang Nov 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
*/
package org.apache.pinot.common.data;

import com.fasterxml.jackson.core.JsonProcessingException;
import java.io.IOException;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayList;
Expand Down Expand Up @@ -404,4 +406,47 @@ private String getRandomOrderJsonString(String[] fields) {
jsonString.append('}');
return jsonString.toString();
}

@DataProvider(name = "nullableCases")
public static Object[][] nullableCases() {
return new Object[][] {
// declared notNull, returned notNull
new Object[] {null},
new Object[] {false},
new Object[] {true}
};
}

@Test(dataProvider = "nullableCases")
void testNullability(Boolean declared)
throws IOException {
boolean expected = declared == Boolean.TRUE;
String json;
if (declared == null) {
json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\"}";
} else {
json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\", \"notNull\": " + declared + "}";
}
DimensionFieldSpec fieldSpec = JsonUtils.stringToObject(json, DimensionFieldSpec.class);

Assert.assertEquals(fieldSpec.isNotNull(), expected, "Unexpected notNull read when declared as " + declared);
Assert.assertEquals(fieldSpec.isNullable(), !expected, "Unexpected nullable read when declared as " + declared);
}

@Test(dataProvider = "nullableCases")
void testNullabilityIdempotency(Boolean declared)
throws JsonProcessingException {
String json;
if (declared == null) {
json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\"}";
} else {
json = "{\"name\": \"col1\", \"dataType\":\"BOOLEAN\", \"notNull\": " + declared + "}";
}
DimensionFieldSpec fieldSpec = JsonUtils.stringToObject(json, DimensionFieldSpec.class);

String serialized = JsonUtils.objectToString(fieldSpec);
DimensionFieldSpec deserialized = JsonUtils.stringToObject(serialized, DimensionFieldSpec.class);

Assert.assertEquals(deserialized, fieldSpec, "Changes detected while checking serialize/deserialize idempotency");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
*/
package org.apache.pinot.common.data;

import com.fasterxml.jackson.databind.JsonNode;
import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.net.URL;
import java.sql.Timestamp;
Expand All @@ -33,6 +35,7 @@
import org.apache.pinot.spi.data.TimeGranularitySpec;
import org.apache.pinot.spi.data.TimeGranularitySpec.TimeFormat;
import org.apache.pinot.spi.utils.BytesUtils;
import org.apache.pinot.spi.utils.JsonUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
Expand Down Expand Up @@ -320,6 +323,29 @@ public void testSerializeDeserialize()
Assert.assertNotEquals(jsonSchemaToCompare, jsonSchema);
}

@Test
public void testSerializeDeserializeOptions()
throws IOException {
String json = "{\n"
+ " \"primaryKeyColumns\" : null,\n"
+ " \"timeFieldSpec\" : null,\n"
+ " \"schemaName\" : null,\n"
+ " \"enableColumnBasedNullHandling\" : true,\n"
+ " \"dimensionFieldSpecs\" : [ ],\n"
+ " \"metricFieldSpecs\" : [ ],\n"
+ " \"dateTimeFieldSpecs\" : [ ]\n"
+ "}";
JsonNode expectedNode = JsonUtils.stringToJsonNode(json);

Schema schema = JsonUtils.jsonNodeToObject(expectedNode, Schema.class);
Assert.assertTrue(schema.isEnableColumnBasedNullHandling(), "Column null handling should be enabled");

String serialized = JsonUtils.objectToString(schema);
Schema deserialized = JsonUtils.stringToObject(serialized, Schema.class);

Assert.assertEquals(deserialized, schema, "Changes detected while checking serialize/deserialize idempotency");
}

@Test
public void testSimpleDateFormat()
throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ public static void mergeWithOrdering(SelectionResultsBlock mergedBlock, Selectio

/**
* Build a {@link DataTable} from a {@link Collection} of selection rows with {@link DataSchema}. (Server side)
*
* This method is allowed to modify the given rows. Specifically, it may remove nulls cells from it.
*/
public static DataTable getDataTableFromRows(Collection<Object[]> rows, DataSchema dataSchema,
boolean nullHandlingEnabled)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,3 @@
},
"schemaName": "mytable"
}

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.pinot.query.type;

import java.util.Map;
import java.util.function.Predicate;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeSystem;
Expand All @@ -31,8 +32,8 @@
* Extends Java-base TypeFactory from Calcite.
*
* <p>{@link JavaTypeFactoryImpl} is used here because we are not overriding much of the TypeFactory methods
* required by Calcite. We will start extending {@link SqlTypeFactoryImpl} or even {@link RelDataTypeFactory}
* when necessary for Pinot to override such mechanism.
* required by Calcite. We will start extending {@link org.apache.calcite.sql.type.SqlTypeFactoryImpl} or even
* {@link org.apache.calcite.rel.type.RelDataTypeFactory} when necessary for Pinot to override such mechanism.
*
* <p>Noted that {@link JavaTypeFactoryImpl} is subject to change. Please pay extra attention to this class when
* upgrading Calcite versions.
Expand All @@ -45,20 +46,36 @@ public TypeFactory(RelDataTypeSystem typeSystem) {

public RelDataType createRelDataTypeFromSchema(Schema schema) {
Builder builder = new Builder(this);
Predicate<FieldSpec> isNullable;
if (schema.isEnableColumnBasedNullHandling()) {
isNullable = FieldSpec::isNullable;
} else {
isNullable = fieldSpec -> false;
}
for (Map.Entry<String, FieldSpec> e : schema.getFieldSpecMap().entrySet()) {
builder.add(e.getKey(), toRelDataType(e.getValue()));
builder.add(e.getKey(), toRelDataType(e.getValue(), isNullable));
}
return builder.build();
}

private RelDataType toRelDataType(FieldSpec fieldSpec) {
private RelDataType toRelDataType(FieldSpec fieldSpec, Predicate<FieldSpec> isNullable) {
RelDataType type = createSqlType(getSqlTypeName(fieldSpec));
boolean isArray = !fieldSpec.isSingleValueField();
if (isArray) {
type = createArrayType(type, -1);
}
if (isNullable.test(fieldSpec)) {
type = createTypeWithNullability(type, true);
}
Comment on lines +64 to +69
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we consider isNullable flag here refer to the array itself or this is refering to the element inside the array can be null?
it is interesting b/c some DB consider array to be null <=> empty; but postgres do not (e.g. can have null array)
let's make sure we document it properly here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most databases do not correctly support arrays (ie mysql 6.x). Postgres, on the other hand, has a sane type system (that even supports inheritance!). AFAIK when a column of type array is nullable it means the array itself is nullable. I don't know if each element in the array can be tuned nullable or not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah agreed. as long as we document & best effort align with postgers it should be good :-D

return type;
}

private SqlTypeName getSqlTypeName(FieldSpec fieldSpec) {
switch (fieldSpec.getDataType()) {
case INT:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.INTEGER)
: createArrayType(createSqlType(SqlTypeName.INTEGER), -1);
return SqlTypeName.INTEGER;
case LONG:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.BIGINT)
: createArrayType(createSqlType(SqlTypeName.BIGINT), -1);
return SqlTypeName.BIGINT;
// Map float and double to the same RelDataType so that queries like
// `select count(*) from table where aFloatColumn = 0.05` works correctly in multi-stage query engine.
//
Expand All @@ -71,34 +88,32 @@ private RelDataType toRelDataType(FieldSpec fieldSpec) {
// With float and double mapped to the same RelDataType, the behavior in multi-stage query engine will be the same
// as the query in v1 query engine.
case FLOAT:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.DOUBLE)
: createArrayType(createSqlType(SqlTypeName.REAL), -1);
if (fieldSpec.isSingleValueField()) {
return SqlTypeName.DOUBLE;
} else {
// TODO: This may be wrong. The reason why we want to use DOUBLE in single value float may also apply here
return SqlTypeName.REAL;
}
case DOUBLE:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.DOUBLE)
: createArrayType(createSqlType(SqlTypeName.DOUBLE), -1);
return SqlTypeName.DOUBLE;
case BOOLEAN:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.BOOLEAN)
: createArrayType(createSqlType(SqlTypeName.BOOLEAN), -1);
return SqlTypeName.BOOLEAN;
case TIMESTAMP:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.TIMESTAMP)
: createArrayType(createSqlType(SqlTypeName.TIMESTAMP), -1);
return SqlTypeName.TIMESTAMP;
case STRING:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.VARCHAR)
: createArrayType(createSqlType(SqlTypeName.VARCHAR), -1);
return SqlTypeName.VARCHAR;
case BYTES:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.VARBINARY)
: createArrayType(createSqlType(SqlTypeName.VARBINARY), -1);
return SqlTypeName.VARBINARY;
case BIG_DECIMAL:
return fieldSpec.isSingleValueField() ? createSqlType(SqlTypeName.DECIMAL)
: createArrayType(createSqlType(SqlTypeName.DECIMAL), -1);
return SqlTypeName.DECIMAL;
case JSON:
return createSqlType(SqlTypeName.VARCHAR);
return SqlTypeName.VARCHAR;
case LIST:
// TODO: support LIST, MV column should go fall into this category.
case STRUCT:
case MAP:
default:
String message = String.format("Unsupported type: %s ", fieldSpec.getDataType().toString());
String message = String.format("Unsupported type: %s ", fieldSpec.getDataType());
throw new UnsupportedOperationException(message);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@ public class ResourceBasedQueryPlansTest extends QueryEnvironmentTestBase {
private static final String FILE_FILTER_PROPERTY = "pinot.fileFilter";

@Test(dataProvider = "testResourceQueryPlannerTestCaseProviderHappyPath")
public void testQueryExplainPlansAndQueryPlanConversion(String testCaseName, String query, String output) {
public void testQueryExplainPlansAndQueryPlanConversion(String testCaseName, String description, String query,
String output) {
try {
long requestId = RANDOM_REQUEST_ID_GEN.nextLong();
String explainedPlan = _queryEnvironment.explainQuery(query, requestId);
Assert.assertEquals(explainedPlan, output,
String.format("Test case %s for query %s doesn't match expected output: %s", testCaseName, query, output));
String.format("Test case %s for query %s (%s) doesn't match expected output: %s", testCaseName, description,
query, output));
// use a regex to exclude the
String queryWithoutExplainPlan = query.replaceFirst(EXPLAIN_REGEX, "");
DispatchableSubPlan dispatchableSubPlan = _queryEnvironment.planQuery(queryWithoutExplainPlan);
Expand Down Expand Up @@ -105,7 +107,7 @@ private static Object[][] testResourceQueryPlannerTestCaseProviderHappyPath()
String sql = queryCase._sql;
List<String> orgOutput = queryCase._output;
String concatenatedOutput = StringUtils.join(orgOutput, "");
Object[] testEntry = new Object[]{testCaseName, sql, concatenatedOutput};
Object[] testEntry = new Object[]{testCaseName, queryCase._description, sql, concatenatedOutput};
providerContent.add(testEntry);
}
}
Expand Down
Loading