Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,18 @@ public class MetadataEqualsHashCodeTest {
public void testEqualsAndHashCode() {
EqualsVerifier.forClass(InstanceZKMetadata.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
.usingGetClass().verify();
EqualsVerifier.forClass(Schema.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS).usingGetClass()
EqualsVerifier.forClass(DimensionFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
.usingGetClass().verify();
EqualsVerifier.forClass(MetricFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
.usingGetClass().verify();
EqualsVerifier.forClass(TimeFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS).usingGetClass()
.verify();
EqualsVerifier.forClass(DateTimeFieldSpec.class).suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS)
.usingGetClass().verify();
// NOTE: Suppress Warning.ALL_FIELDS_SHOULD_BE_USED because some fields are derived from other fields, but we cannot
// declare them as transitive because they are still needed after ser/de
EqualsVerifier.forClass(Schema.class)
.suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.ALL_FIELDS_SHOULD_BE_USED).usingGetClass()
.verify();
EqualsVerifier.forClass(DimensionFieldSpec.class)
.suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
EqualsVerifier.forClass(MetricFieldSpec.class)
.suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
EqualsVerifier.forClass(TimeFieldSpec.class)
.suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
EqualsVerifier.forClass(DateTimeFieldSpec.class)
.suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS, Warning.TRANSIENT_FIELDS).usingGetClass().verify();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@
package org.apache.pinot.controller.utils;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
import org.apache.pinot.segment.spi.ColumnMetadata;
Expand Down Expand Up @@ -87,7 +86,7 @@ public static SegmentMetadata mockSegmentMetadataWithPartitionInfo(String tableN
when(segmentMetadata.getName()).thenReturn(segmentName);
when(segmentMetadata.getCrc()).thenReturn("0");

Map<String, ColumnMetadata> columnMetadataMap = new HashMap<>();
TreeMap<String, ColumnMetadata> columnMetadataMap = new TreeMap<>();
columnMetadataMap.put(columnName, columnMetadata);
when(segmentMetadata.getColumnMetadataMap()).thenReturn(columnMetadataMap);
return segmentMetadata;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,71 +19,49 @@
package org.apache.pinot.core.util;

import java.io.File;
import java.net.URL;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
import org.apache.pinot.segment.local.segment.creator.SegmentTestUtils;
import org.apache.pinot.segment.local.segment.creator.impl.SegmentCreationDriverFactory;
import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
import org.apache.pinot.segment.local.segment.index.converter.SegmentV1V2ToV3FormatConverter;
import org.apache.pinot.segment.local.utils.CrcUtils;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.SegmentMetadata;
import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
import org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver;
import org.apache.pinot.spi.utils.ReadMode;
import org.apache.pinot.util.TestUtils;
import org.testng.Assert;
import org.testng.annotations.Test;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;

/**
* Dec 4, 2014
*/

public class CrcUtilsTest {

private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "CrcUtilsTest");
private static final String AVRO_DATA = "data/test_data-mv.avro";
private static final File INDEX_DIR = new File("/tmp/testingCrc");
private static final long EXPECTED_V1_CRC = 4139425029L;
private static final long EXPECTED_V3_CRC = 94877217L;

@Test
public void test1()
throws Exception {
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}

final CrcUtils u1 = CrcUtils.forAllFilesInFolder(new File(makeSegmentAndReturnPath()));
final long crc1 = u1.computeCrc();
final String md51 = u1.computeMD5();

FileUtils.deleteQuietly(INDEX_DIR);

final CrcUtils u2 = CrcUtils.forAllFilesInFolder(new File(makeSegmentAndReturnPath()));
final long crc2 = u2.computeCrc();
final String md52 = u2.computeMD5();

Assert.assertEquals(crc1, crc2);
Assert.assertEquals(md51, md52);

FileUtils.deleteQuietly(INDEX_DIR);

final IndexSegment segment = ImmutableSegmentLoader.load(new File(makeSegmentAndReturnPath()), ReadMode.mmap);
final SegmentMetadata m = segment.getSegmentMetadata();

FileUtils.deleteQuietly(INDEX_DIR);
}

private String makeSegmentAndReturnPath()
public void testCrc()
Copy link
Contributor

@61yao 61yao Mar 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this test different column orders give same CRC?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, different column order will give different CRC. This test is used to make sure the order is always the same

throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(CrcUtils.class.getClassLoader().getResource(AVRO_DATA));

final SegmentGeneratorConfig config = SegmentTestUtils
.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.DAYS,
"testTable");
config.setSegmentNamePostfix("1");
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
FileUtils.deleteDirectory(INDEX_DIR);

URL resource = getClass().getClassLoader().getResource(AVRO_DATA);
assertNotNull(resource);
String filePath = TestUtils.getFileFromResourceUrl(resource);
SegmentGeneratorConfig config =
SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
TimeUnit.DAYS, "testTable");
SegmentIndexCreationDriver driver = new SegmentIndexCreationDriverImpl();
driver.init(config);
driver.build();

return new File(INDEX_DIR, driver.getSegmentName()).getAbsolutePath();
File indexDir = driver.getOutputDirectory();
assertEquals(CrcUtils.forAllFilesInFolder(indexDir).computeCrc(), EXPECTED_V1_CRC);

new SegmentV1V2ToV3FormatConverter().convert(indexDir);
assertEquals(CrcUtils.forAllFilesInFolder(indexDir).computeCrc(), EXPECTED_V3_CRC);

FileUtils.deleteDirectory(INDEX_DIR);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
private static final String COLUMN_LENGTH_MAP_KEY = "columnLengthMap";
private static final String COLUMN_CARDINALITY_MAP_KEY = "columnCardinalityMap";
private static final String MAX_NUM_MULTI_VALUES_MAP_KEY = "maxNumMultiValuesMap";
// TODO: This might lead to flaky test, as this disk size is not deterministic
// as it depends on the iteration order of a HashSet.
private static final int DISK_SIZE_IN_BYTES = 20797128;
private static final int DISK_SIZE_IN_BYTES = 20797327;
private static final int NUM_ROWS = 115545;

private final List<ServiceStatus.ServiceStatusCallback> _serviceStatusCallbacks =
Expand Down Expand Up @@ -518,12 +516,7 @@ public void testInvertedIndexTriggering()
// download, the original segment dir is deleted and then replaced with the newly downloaded segment, leaving a
// small chance of race condition between getting table size check and replacing the segment dir, i.e. flaky test.
addInvertedIndex();
// The table size gets larger for sure, but it does not necessarily equal to tableSizeWithNewIndex, because the
// order of entries in the index_map file can change when the raw segment adds/deletes indices back and forth.
long tableSizeAfterAddIndex = getTableSize(getTableName());
assertTrue(tableSizeAfterAddIndex > tableSizeAfterReloadSegment,
String.format("Table size: %d should increase after adding inverted index on segment: %s, as compared with %d",
tableSizeAfterAddIndex, segmentName, tableSizeAfterReloadSegment));
assertEquals(getTableSize(getTableName()), tableSizeWithNewIndex);

// Force to download the whole table and use the original table config, so the disk usage should get back to
// initial value.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,11 @@ private org.apache.avro.Schema extractAvroSchemaFromFile(String inputPath) {
}

private void validateSchemas() {
for (String columnName : _pinotSchema.getPhysicalColumnNames()) {
FieldSpec fieldSpec = _pinotSchema.getFieldSpecFor(columnName);
for (FieldSpec fieldSpec : _pinotSchema.getAllFieldSpecs()) {
if (fieldSpec.isVirtualColumn()) {
continue;
}
String columnName = fieldSpec.getName();
org.apache.avro.Schema.Field avroColumnField = _avroSchema.getField(columnName);
if (avroColumnField == null) {
_missingPinotColumn.addMismatchReason(String
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,24 +290,58 @@ private Object[][] provideQueriesWithException() {

@DataProvider(name = "testQueryPlanDataProvider")
private Object[][] provideQueriesWithExplainedPlan() {
//@formatter:off
return new Object[][] {
new Object[]{"EXPLAIN PLAN INCLUDING ALL ATTRIBUTES AS JSON FOR SELECT col1, col3 FROM a", "{\n"
+ " \"rels\": [\n" + " {\n" + " \"id\": \"0\",\n" + " \"relOp\": \"LogicalTableScan\",\n"
+ " \"table\": [\n" + " \"a\"\n" + " ],\n" + " \"inputs\": []\n" + " },\n"
+ " {\n" + " \"id\": \"1\",\n" + " \"relOp\": \"LogicalProject\",\n" + " \"fields\": [\n"
+ " \"col1\",\n" + " \"col3\"\n" + " ],\n" + " \"exprs\": [\n" + " {\n"
+ " \"input\": 3,\n" + " \"name\": \"$3\"\n" + " },\n" + " {\n"
+ " \"input\": 2,\n" + " \"name\": \"$2\"\n" + " }\n" + " ]\n" + " }\n"
+ " ]\n" + "}"},
new Object[]{"EXPLAIN PLAN INCLUDING ALL ATTRIBUTES AS JSON FOR SELECT col1, col3 FROM a",
"{\n"
+ " \"rels\": [\n"
+ " {\n"
+ " \"id\": \"0\",\n"
+ " \"relOp\": \"LogicalTableScan\",\n"
+ " \"table\": [\n"
+ " \"a\"\n"
+ " ],\n"
+ " \"inputs\": []\n"
+ " },\n"
+ " {\n"
+ " \"id\": \"1\",\n"
+ " \"relOp\": \"LogicalProject\",\n"
+ " \"fields\": [\n"
+ " \"col1\",\n"
+ " \"col3\"\n"
+ " ],\n"
+ " \"exprs\": [\n"
+ " {\n"
+ " \"input\": 0,\n"
+ " \"name\": \"$0\"\n"
+ " },\n"
+ " {\n"
+ " \"input\": 2,\n"
+ " \"name\": \"$2\"\n"
+ " }\n"
+ " ]\n"
+ " }\n"
+ " ]\n"
+ "}"},
new Object[]{"EXPLAIN PLAN EXCLUDING ATTRIBUTES AS DOT FOR SELECT col1, COUNT(*) FROM a GROUP BY col1",
"Execution Plan\n" + "digraph {\n" + "\"LogicalExchange\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n"
+ "\"LogicalAggregate\\n\" -> \"LogicalExchange\\n\" [label=\"0\"]\n"
+ "\"LogicalTableScan\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n" + "}\n"},
new Object[]{"EXPLAIN PLAN FOR SELECT a.col1, b.col3 FROM a JOIN b ON a.col1 = b.col1", "Execution Plan\n"
+ "LogicalProject(col1=[$0], col3=[$1])\n" + " LogicalJoin(condition=[=($0, $2)], joinType=[inner])\n"
+ " LogicalExchange(distribution=[hash[0]])\n" + " LogicalProject(col1=[$3])\n"
+ " LogicalTableScan(table=[[a]])\n" + " LogicalExchange(distribution=[hash[1]])\n"
+ " LogicalProject(col3=[$2], col1=[$3])\n" + " LogicalTableScan(table=[[b]])\n"},
"Execution Plan\n"
+ "digraph {\n"
+ "\"LogicalExchange\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n"
+ "\"LogicalAggregate\\n\" -> \"LogicalExchange\\n\" [label=\"0\"]\n"
+ "\"LogicalTableScan\\n\" -> \"LogicalAggregate\\n\" [label=\"0\"]\n"
+ "}\n"},
new Object[]{"EXPLAIN PLAN FOR SELECT a.col1, b.col3 FROM a JOIN b ON a.col1 = b.col1",
"Execution Plan\n"
+ "LogicalProject(col1=[$0], col3=[$2])\n"
+ " LogicalJoin(condition=[=($0, $1)], joinType=[inner])\n"
+ " LogicalExchange(distribution=[hash[0]])\n"
+ " LogicalProject(col1=[$0])\n"
+ " LogicalTableScan(table=[[a]])\n"
+ " LogicalExchange(distribution=[hash[0]])\n"
+ " LogicalProject(col1=[$0], col3=[$2])\n"
+ " LogicalTableScan(table=[[b]])\n"
},
};
//@formatter:on
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"\nLogicalProject(avg=[/(CASE(=($1, 0), null:DECIMAL(1000, 0), $0), $1)])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[$SUM0($1)])",
"\n LogicalExchange(distribution=[hash])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT()])",
"\n LogicalProject(col4=[$0], col2=[$1], col3=[$2])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
"\n LogicalProject(col2=[$1], col3=[$2], col4=[$3])",
"\n LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
"\n LogicalTableScan(table=[[a]])",
"\n"
Expand All @@ -24,8 +24,8 @@
"\nLogicalProject(avg=[/(CASE(=($1, 0), null:DECIMAL(1000, 0), $0), $1)], sum=[CASE(=($1, 0), null:DECIMAL(1000, 0), $0)], max=[$2])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[$SUM0($1)], max=[MAX($2)])",
"\n LogicalExchange(distribution=[hash])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT()], max=[MAX($0)])",
"\n LogicalProject(col4=[$0], col2=[$1], col3=[$2])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()], max=[MAX($2)])",
"\n LogicalProject(col2=[$1], col3=[$2], col4=[$3])",
"\n LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
"\n LogicalTableScan(table=[[a]])",
"\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"sql": "EXPLAIN PLAN FOR SELECT a.col1, a.col3 + a.ts FROM a WHERE a.col3 >= 0 AND a.col2 = 'a'",
"output": [
"Execution Plan",
"\nLogicalProject(col1=[$3], EXPR$1=[+($2, $4)])",
"\nLogicalProject(col1=[$0], EXPR$1=[+($2, $4)])",
"\n LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
"\n LogicalTableScan(table=[[a]])",
"\n"
Expand All @@ -26,7 +26,7 @@
"sql": "EXPLAIN PLAN FOR SELECT a.col1, a.col3 + a.ts AS colsum FROM a WHERE a.col3 >= 0 AND a.col2 = 'a'",
"output": [
"Execution Plan",
"\nLogicalProject(col1=[$3], colsum=[+($2, $4)])",
"\nLogicalProject(col1=[$0], colsum=[+($2, $4)])",
"\n LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
"\n LogicalTableScan(table=[[a]])",
"\n"
Expand Down
Loading