apache · PrachiKhobragade · Feb 5, 2024
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/recommender/io/InputManager.java
@@ -25,6 +25,7 @@
 import com.fasterxml.jackson.databind.JsonNode;
 import com.google.common.annotations.VisibleForTesting;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
@@ -123,7 +124,7 @@ public class InputManager {
   Set<String> _dimNames = null;
   Set<String> _metricNames = null;
   Set<String> _dateTimeNames = null;
-  Set<String> _columnNamesInvertedSortedIndexApplicable = null;
+  List<String> _columnNamesInvertedSortedIndexApplicable = null;
   Map<String, Integer> _colNameToIntMap = null;
   String[] _intToColNameMap = null;
   Map<String, Triple<Double, BrokerRequest, QueryContext>> _parsedQueries = new HashMap<>();
@@ -261,10 +262,12 @@ private void reorderDimsAndBuildMap()
     _colNameToIntMap = new HashMap<>();
 
     // Inverted index and sorted index will be recommended on all types of columns : dimensions, metrics and date time
-    _columnNamesInvertedSortedIndexApplicable = new HashSet<>(_dimNames);
+    _columnNamesInvertedSortedIndexApplicable = new ArrayList<>(_dimNames);
     _columnNamesInvertedSortedIndexApplicable.addAll(_metricNames);
     _columnNamesInvertedSortedIndexApplicable.addAll(_dateTimeNames);
-
+    // Made _columnNamesInvertedSortedIndexApplicable as a list, so that the _colNameToIntMap has dimensionFields at the
+    // start, followed by metric fields and then date time fields. This is important for the FixedLenBitset to work,
+    // which only has dimensionFields in it.
     AtomicInteger counter = new AtomicInteger(0);
     _columnNamesInvertedSortedIndexApplicable.forEach(name -> {
       _intToColNameMap[counter.get()] = name;

diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java b/pinot-controller/src/test/java/org/apache/pinot/controller/recommender/TestConfigEngine.java
@@ -327,6 +327,23 @@ void testPinotTablePartitionRule()
     assertEquals(output.getPartitionConfig().getNumPartitionsOffline(), 4);
   }
 
+  /**
+   * tests PinotTablePartitionRule when multiple metrics and dimensions are available, making sure that weights for
+   * dimension fields get set correctly
+   */
+  @Test
+  void testPinotTablePartitionRule2()
+      throws InvalidInputException, IOException {
+    loadInput("recommenderInput/PinotTablePartitionRuleInput2.json");
+    AbstractRule abstractRule = RulesToExecute.RuleFactory
+        .getRule(RulesToExecute.Rule.PinotTablePartitionRule, _input, _input._overWrittenConfigs);
+    abstractRule.run();
+    ConfigManager output = _input._overWrittenConfigs;
+    LOGGER.debug("{} {} {}", output.getPartitionConfig().getPartitionDimension(),
+        output.getPartitionConfig().getNumPartitionsRealtime(), output.getPartitionConfig().getNumPartitionsOffline());
+    assertEquals(output.getPartitionConfig().getPartitionDimension().toString(), "colA");
+  }
+
   @Test
   void testKafkaPartitionRule()
       throws InvalidInputException, IOException {

diff --git a/pinot-controller/src/test/resources/recommenderInput/PinotTablePartitionRuleInput2.json b/pinot-controller/src/test/resources/recommenderInput/PinotTablePartitionRuleInput2.json
@@ -0,0 +1,68 @@
+{
+  "schema": {
+    "dateTimeFieldSpecs": [
+      {
+        "dataType": "LONG",
+        "format": "1:MILLISECONDS:EPOCH",
+        "granularity": "1:MILLISECONDS",
+        "name": "timestampMillis",
+        "cardinality": 10000
+      }
+    ],
+    "dimensionFieldSpecs": [
+      {
+        "averageLength": 8,
+        "cardinality": 40000,
+        "dataType": "STRING",
+        "name": "colA"
+      },
+      {
+        "averageLength": 16,
+        "cardinality": 200,
+        "dataType": "STRING",
+        "name": "colB"
+      },
+      {
+        "averageLength": 50,
+        "cardinality": 100000,
+        "dataType": "STRING",
+        "name": "colC"
+      },
+      {
+        "averageLength": 25,
+        "cardinality": 5000,
+        "dataType": "INT",
+        "name": "partition"
+      }
+    ],
+    "metricFieldSpecs": [
+      {
+        "cardinality": 5000,
+        "dataType": "LONG",
+        "name": "metricA"
+      },
+      {
+        "cardinality": 5000,
+        "dataType": "LONG",
+        "name": "metricB"
+      }
+    ],
+    "schemaName": "myTable"
+  },
+  "queriesWithWeights":{
+    "select colC, \"partition\" as partitionNum, max(metricA) as maxMetricA,avg(metricA) as avgMetricA, avg(metricB) as avgMetricB from myTable where colA='valA' and timestampMillis > now() - 3600000 and colB='valB' and timestampMillis < now() group by colB,colC,\"partition\" order by max(metricA) desc limit 10000": 1,
+    "select colC, \"partition\" as partitionNum, max(metricA) as maxMetricA,avg(metricA) as avgMetricA from myTable where colA='val1' and timestampMillis > now() - 3600000 and colB='valB' and timestampMillis < now() and maxMetricA > 2000 group by colB,colC,\"partition\" order by max(metricA) desc limit 10000": 1,
+    "select timestampMillis, sum(metricB) from myTable where (colC='A' or colC='B') and (timestampMillis >= 123) group by timestampMillis order by timestampMillis asc": 1,
+    "select colA, \"partition\" as partitionNum from myTable where colA='valA'": 50,
+    "select colA, colB from myTable where colA='valB'": 30
+  },
+  "qps": 400,
+  "tableType": "REALTIME",
+  "latencySLA": 500,
+  "rulesToExecute": {
+    "recommendPinotTablePartition": true
+  },
+  "numMessagesPerSecInKafkaTopic":50000,
+  "overWrittenConfigs": {
+  }
+}