apache · Jackie-Jiang · Mar 30, 2023 · Mar 23, 2023 · Mar 28, 2023 · Mar 28, 2023
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metadata/MetadataEqualsHashCodeTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metadata/MetadataEqualsHashCodeTest.java
@@ -39,15 +39,18 @@ public class MetadataEqualsHashCodeTest {
   public void testEqualsAndHashCode() {
     EqualsVerifier.forClass(InstanceZKMetadata.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
         .usingGetClass().verify();
-    EqualsVerifier.forClass(Schema.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS).usingGetClass()
+    EqualsVerifier.forClass(DimensionFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
+        .usingGetClass().verify();
+    EqualsVerifier.forClass(MetricFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
+        .usingGetClass().verify();
+    EqualsVerifier.forClass(TimeFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS).usingGetClass()
+        .verify();
+    EqualsVerifier.forClass(DateTimeFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
+        .usingGetClass().verify();
+    // NOTE: Suppress Warning.ALL_FIELDS_SHOULD_BE_USED because some fields are derived from other fields, but we cannot
+    //       declare them as transitive because they are still needed after ser/de
+    EqualsVerifier.forClass(Schema.class)
+        .suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.ALL_FIELDS_SHOULD_BE_USED).usingGetClass()
         .verify();
-    EqualsVerifier.forClass(DimensionFieldSpec.class)
-        .suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
-    EqualsVerifier.forClass(MetricFieldSpec.class)
-        .suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
-    EqualsVerifier.forClass(TimeFieldSpec.class)
-        .suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
-    EqualsVerifier.forClass(DateTimeFieldSpec.class)
-        .suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
   }
 }
diff --git a/...-controller/src/test/java/org/apache/pinot/controller/utils/SegmentMetadataMockUtils.java b/...-controller/src/test/java/org/apache/pinot/controller/utils/SegmentMetadataMockUtils.java
@@ -19,9 +19,8 @@
 package org.apache.pinot.controller.utils;
 
 import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.Set;
+import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
 import org.apache.pinot.segment.spi.ColumnMetadata;
@@ -87,7 +86,7 @@ public static SegmentMetadata mockSegmentMetadataWithPartitionInfo(String tableN
     when(segmentMetadata.getName()).thenReturn(segmentName);
     when(segmentMetadata.getCrc()).thenReturn("0");
 
-    Map<String, ColumnMetadata> columnMetadataMap = new HashMap<>();
+    TreeMap<String, ColumnMetadata> columnMetadataMap = new TreeMap<>();
     columnMetadataMap.put(columnName, columnMetadata);
     when(segmentMetadata.getColumnMetadataMap()).thenReturn(columnMetadataMap);
     return segmentMetadata;

diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/CrcUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/CrcUtilsTest.java
@@ -19,71 +19,49 @@
 package org.apache.pinot.core.util;
 
 import java.io.File;
+import java.net.URL;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.io.FileUtils;
-import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
 import org.apache.pinot.segment.local.segment.creator.SegmentTestUtils;
-import org.apache.pinot.segment.local.segment.creator.impl.SegmentCreationDriverFactory;
+import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.segment.local.segment.index.converter.SegmentV1V2ToV3FormatConverter;
 import org.apache.pinot.segment.local.utils.CrcUtils;
-import org.apache.pinot.segment.spi.IndexSegment;
-import org.apache.pinot.segment.spi.SegmentMetadata;
 import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
 import org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver;
-import org.apache.pinot.spi.utils.ReadMode;
 import org.apache.pinot.util.TestUtils;
-import org.testng.Assert;
 import org.testng.annotations.Test;
 
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
 
-/**
- * Dec 4, 2014
- */
 
 public class CrcUtilsTest {
-
+  private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "CrcUtilsTest");
   private static final String AVRO_DATA = "data/test_data-mv.avro";
-  private static final File INDEX_DIR = new File("/tmp/testingCrc");
+  private static final long EXPECTED_V1_CRC = 4139425029L;
+  private static final long EXPECTED_V3_CRC = 94877217L;
 
   @Test
-  public void test1()
-      throws Exception {
-    if (INDEX_DIR.exists()) {
-      FileUtils.deleteQuietly(INDEX_DIR);
-    }
-
-    final CrcUtils u1 = CrcUtils.forAllFilesInFolder(new File(makeSegmentAndReturnPath()));
-    final long crc1 = u1.computeCrc();
-    final String md51 = u1.computeMD5();
-
-    FileUtils.deleteQuietly(INDEX_DIR);
-
-    final CrcUtils u2 = CrcUtils.forAllFilesInFolder(new File(makeSegmentAndReturnPath()));
-    final long crc2 = u2.computeCrc();
-    final String md52 = u2.computeMD5();
-
-    Assert.assertEquals(crc1, crc2);
-    Assert.assertEquals(md51, md52);
-
-    FileUtils.deleteQuietly(INDEX_DIR);
-
-    final IndexSegment segment = ImmutableSegmentLoader.load(new File(makeSegmentAndReturnPath()), ReadMode.mmap);
-    final SegmentMetadata m = segment.getSegmentMetadata();
-
-    FileUtils.deleteQuietly(INDEX_DIR);
-  }
-
-  private String makeSegmentAndReturnPath()
+  public void testCrc()
       throws Exception {
-    final String filePath = TestUtils.getFileFromResourceUrl(CrcUtils.class.getClassLoader().getResource(AVRO_DATA));
-
-    final SegmentGeneratorConfig config = SegmentTestUtils
-        .getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS,
-            "testTable");
-    config.setSegmentNamePostfix("1");
-    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
+    FileUtils.deleteDirectory(INDEX_DIR);
+
+    URL resource = getClass().getClassLoader().getResource(AVRO_DATA);
+    assertNotNull(resource);
+    String filePath = TestUtils.getFileFromResourceUrl(resource);
+    SegmentGeneratorConfig config =
+        SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
+            TimeUnit.DAYS, "testTable");
+    SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl();
     driver.init(config);
     driver.build();
 
-    return new File(INDEX_DIR, driver.getSegmentName()).getAbsolutePath();
+    File indexDir = driver.getOutputDirectory();
+    assertEquals(CrcUtils.forAllFilesInFolder(indexDir).computeCrc(), EXPECTED_V1_CRC);
+
+    new SegmentV1V2ToV3FormatConverter().convert(indexDir);
+    assertEquals(CrcUtils.forAllFilesInFolder(indexDir).computeCrc(), EXPECTED_V3_CRC);
+
+    FileUtils.deleteDirectory(INDEX_DIR);
   }
 }
diff --git a/...tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/...tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -138,9 +138,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
   private static final String COLUMN_LENGTH_MAP_KEY = "columnLengthMap";
   private static final String COLUMN_CARDINALITY_MAP_KEY = "columnCardinalityMap";
   private static final String MAX_NUM_MULTI_VALUES_MAP_KEY = "maxNumMultiValuesMap";
-  // TODO: This might lead to flaky test, as this disk size is not deterministic
-  //       as it depends on the iteration order of a HashSet.
-  private static final int DISK_SIZE_IN_BYTES = 20797128;
+  private static final int DISK_SIZE_IN_BYTES = 20797327;
   private static final int NUM_ROWS = 115545;
 
   private final List<ServiceStatus.ServiceStatusCallback> _serviceStatusCallbacks =
@@ -518,12 +516,7 @@ public void testInvertedIndexTriggering()
     // download, the original segment dir is deleted and then replaced with the newly downloaded segment, leaving a
     // small chance of race condition between getting table size check and replacing the segment dir, i.e. flaky test.
     addInvertedIndex();
-    // The table size gets larger for sure, but it does not necessarily equal to tableSizeWithNewIndex, because the
-    // order of entries in the index_map file can change when the raw segment adds/deletes indices back and forth.
-    long tableSizeAfterAddIndex = getTableSize(getTableName());
-    assertTrue(tableSizeAfterAddIndex > tableSizeAfterReloadSegment,
-        String.format("Table size: %d should increase after adding inverted index on segment: %s, as compared with %d",
-            tableSizeAfterAddIndex, segmentName, tableSizeAfterReloadSegment));
+    assertEquals(getTableSize(getTableName()), tableSizeWithNewIndex);
 
     // Force to download the whole table and use the original table config, so the disk usage should get back to
     // initial value.

diff --git a/.../src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java b/.../src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java
@@ -88,8 +88,11 @@ private org.apache.avro.Schema extractAvroSchemaFromFile(String inputPath) {
   }
 
   private void validateSchemas() {
-    for (String columnName : _pinotSchema.getPhysicalColumnNames()) {
-      FieldSpec fieldSpec = _pinotSchema.getFieldSpecFor(columnName);
+    for (FieldSpec fieldSpec : _pinotSchema.getAllFieldSpecs()) {
+      if (fieldSpec.isVirtualColumn()) {
+        continue;
+      }
+      String columnName = fieldSpec.getName();
       org.apache.avro.Schema.Field avroColumnField = _avroSchema.getField(columnName);
       if (avroColumnField == null) {
         _missingPinotColumn.addMismatchReason(String

diff --git a/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java b/pinot-query-planner/src/test/java/org/apache/pinot/query/QueryCompilationTest.java
@@ -290,24 +290,58 @@ private Object[][] provideQueriesWithException() {
 
   @DataProvider(name = "testQueryPlanDataProvider")
   private Object[][] provideQueriesWithExplainedPlan() {
+    //@formatter:off
     return new Object[][] {
-        new Object[]{"EXPLAIN PLAN INCLUDING ALL ATTRIBUTES AS JSON FOR SELECT col1, col3 FROM a", "{\n"
-            + "  \"rels\": [\n" + "    {\n" + "      \"id\": \"0\",\n" + "      \"relOp\": \"LogicalTableScan\",\n"
-            + "      \"table\": [\n" + "        \"a\"\n" + "      ],\n" + "      \"inputs\": []\n" + "    },\n"
-            + "    {\n" + "      \"id\": \"1\",\n" + "      \"relOp\": \"LogicalProject\",\n" + "      \"fields\": [\n"
-            + "        \"col1\",\n" + "        \"col3\"\n" + "      ],\n" + "      \"exprs\": [\n" + "        {\n"
-            + "          \"input\": 3,\n" + "          \"name\": \"$3\"\n" + "        },\n" + "        {\n"
-            + "          \"input\": 2,\n" + "          \"name\": \"$2\"\n" + "        }\n" + "      ]\n" + "    }\n"
-            + "  ]\n" + "}"},
+        new Object[]{"EXPLAIN PLAN INCLUDING ALL ATTRIBUTES AS JSON FOR SELECT col1, col3 FROM a",
+              "{\n"
+            + "  \"rels\": [\n"
+            + "    {\n"
+            + "      \"id\": \"0\",\n"
+            + "      \"relOp\": \"LogicalTableScan\",\n"
+            + "      \"table\": [\n"
+            + "        \"a\"\n"
+            + "      ],\n"
+            + "      \"inputs\": []\n"
+            + "    },\n"
+            + "    {\n"
+            + "      \"id\": \"1\",\n"
+            + "      \"relOp\": \"LogicalProject\",\n"
+            + "      \"fields\": [\n"
+            + "        \"col1\",\n"
+            + "        \"col3\"\n"
+            + "      ],\n"
+            + "      \"exprs\": [\n"
+            + "        {\n"
+            + "          \"input\": 0,\n"
+            + "          \"name\": \"$0\"\n"
+            + "        },\n"
+            + "        {\n"
+            + "          \"input\": 2,\n"
+            + "          \"name\": \"$2\"\n"
+            + "        }\n"
+            + "      ]\n"
+            + "    }\n"
+            + "  ]\n"
+            + "}"},
         new Object[]{"EXPLAIN PLAN EXCLUDING ATTRIBUTES AS DOT FOR SELECT col1, COUNT(*) FROM a GROUP BY col1",
-            "Execution Plan\n" + "digraph {\n" + "\"LogicalExchange\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n"
-                + "\"LogicalAggregate\\n\" -> \"LogicalExchange\\n\" [label=\"0\"]\n"
-                + "\"LogicalTableScan\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n" + "}\n"},
-        new Object[]{"EXPLAIN PLAN FOR SELECT a.col1, b.col3 FROM a JOIN b ON a.col1 = b.col1", "Execution Plan\n"
-            + "LogicalProject(col1=[$0], col3=[$1])\n" + "  LogicalJoin(condition=[=($0, $2)], joinType=[inner])\n"
-            + "    LogicalExchange(distribution=[hash[0]])\n" + "      LogicalProject(col1=[$3])\n"
-            + "        LogicalTableScan(table=[[a]])\n" + "    LogicalExchange(distribution=[hash[1]])\n"
-            + "      LogicalProject(col3=[$2], col1=[$3])\n" + "        LogicalTableScan(table=[[b]])\n"},
+              "Execution Plan\n"
+            + "digraph {\n"
+            + "\"LogicalExchange\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n"
+            + "\"LogicalAggregate\\n\" -> \"LogicalExchange\\n\" [label=\"0\"]\n"
+            + "\"LogicalTableScan\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n"
+            + "}\n"},
+        new Object[]{"EXPLAIN PLAN FOR SELECT a.col1, b.col3 FROM a JOIN b ON a.col1 = b.col1",
+              "Execution Plan\n"
+            + "LogicalProject(col1=[$0], col3=[$2])\n"
+            + "  LogicalJoin(condition=[=($0, $1)], joinType=[inner])\n"
+            + "    LogicalExchange(distribution=[hash[0]])\n"
+            + "      LogicalProject(col1=[$0])\n"
+            + "        LogicalTableScan(table=[[a]])\n"
+            + "    LogicalExchange(distribution=[hash[0]])\n"
+            + "      LogicalProject(col1=[$0], col3=[$2])\n"
+            + "        LogicalTableScan(table=[[b]])\n"
+        },
     };
+    //@formatter:on
   }
 }
diff --git a/pinot-query-planner/src/test/resources/queries/AggregatePlans.json b/pinot-query-planner/src/test/resources/queries/AggregatePlans.json
@@ -9,8 +9,8 @@
           "\nLogicalProject(avg=[/(CASE(=($1, 0), null:DECIMAL(1000, 0), $0), $1)])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[$SUM0($1)])",
           "\n    LogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT()])",
-          "\n        LogicalProject(col4=[$0], col2=[$1], col3=[$2])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
+          "\n        LogicalProject(col2=[$1], col3=[$2], col4=[$3])",
           "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n"
@@ -24,8 +24,8 @@
           "\nLogicalProject(avg=[/(CASE(=($1, 0), null:DECIMAL(1000, 0), $0), $1)], sum=[CASE(=($1, 0), null:DECIMAL(1000, 0), $0)], max=[$2])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[$SUM0($1)], max=[MAX($2)])",
           "\n    LogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT()], max=[MAX($0)])",
-          "\n        LogicalProject(col4=[$0], col2=[$1], col3=[$2])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()], max=[MAX($2)])",
+          "\n        LogicalProject(col2=[$1], col3=[$2], col4=[$3])",
           "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n"

diff --git a/pinot-query-planner/src/test/resources/queries/BasicQueryPlans.json b/pinot-query-planner/src/test/resources/queries/BasicQueryPlans.json
@@ -15,7 +15,7 @@
         "sql": "EXPLAIN PLAN FOR SELECT a.col1, a.col3 + a.ts FROM a WHERE a.col3 >= 0 AND a.col2 = 'a'",
         "output": [
           "Execution Plan",
-          "\nLogicalProject(col1=[$3], EXPR$1=[+($2, $4)])",
+          "\nLogicalProject(col1=[$0], EXPR$1=[+($2, $4)])",
           "\n  LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n    LogicalTableScan(table=[[a]])",
           "\n"
@@ -26,7 +26,7 @@
         "sql": "EXPLAIN PLAN FOR SELECT a.col1, a.col3 + a.ts AS colsum FROM a WHERE a.col3 >= 0 AND a.col2 = 'a'",
         "output": [
           "Execution Plan",
-          "\nLogicalProject(col1=[$3], colsum=[+($2, $4)])",
+          "\nLogicalProject(col1=[$0], colsum=[+($2, $4)])",
           "\n  LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n    LogicalTableScan(table=[[a]])",
           "\n"