Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ public static String literalMapToHintString(Map<Pair<Integer, Integer>, RexExpre
e.getValue().getDataType().name(), e.getValue().getValue()));
}
// semi-colon is used to separate between encoded literals
return "{" + StringUtils.join(literalStrings, ";") + "}";
return "{" + StringUtils.join(literalStrings, ";:;") + "}";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is this ;:; used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the problem is b/c in some agg literals it allows stringify parameter type with ; as the separator. this conflicts with my usage of ; which indicates the "next literal hint".

Thus i used another 3-character splitter ;:; to indicate hint-literal boundaries.

}

public static Map<Integer, Map<Integer, Literal>> hintStringToLiteralMap(String literalString) {
Map<Integer, Map<Integer, Literal>> aggCallToLiteralArgsMap = new HashMap<>();
if (StringUtils.isNotEmpty(literalString) && !"{}".equals(literalString)) {
String[] literalStringArr = literalString.substring(1, literalString.length() - 1).split(";");
String[] literalStringArr = literalString.substring(1, literalString.length() - 1).split(";:;");
for (String literalStr : literalStringArr) {
String[] literalStrParts = literalStr.split("\\|", 4);
int aggIdx = Integer.parseInt(literalStrParts[0]);
Expand Down
103 changes: 95 additions & 8 deletions pinot-query-runtime/src/test/resources/queries/CountDistinct.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,45 +104,132 @@
"sql": "SELECT l.groupingCol, DISTINCTCOUNT(CONCAT(l.val, r.val)) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ groupingCol, DISTINCTCOUNT(val) FROM {tbl1} GROUP BY groupingCol",
"outputs": [["b", 2], ["a", 2]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNT(l.val), DISTINCTCOUNT(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 2, 3], ["a", 2, 3]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNT(CONCAT(l.val, r.val)) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"comments": "table aren't actually partitioned by val thus all segments can produce duplicate results, thus [[6]]",
"sql": "SELECT SEGMENT_PARTITIONED_DISTINCT_COUNT(val) FROM {tbl1}",
"outputs": [[6]]
},
{
"comments": "table aren't actually partitioned by val thus all segments can produce duplicate results, thus [[b, 4], [a, 4]]",
"sql": "SELECT groupingCol, SEGMENT_PARTITIONED_DISTINCT_COUNT(val) FROM {tbl1} GROUP BY groupingCol",
"outputs": [["b", 4], ["a", 4]]
},
{
"sql": "SELECT l.groupingCol, SEGMENT_PARTITIONED_DISTINCT_COUNT(l.val), SEGMENT_PARTITIONED_DISTINCT_COUNT(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 2, 3], ["a", 2, 3]]
},
{
"sql": "SELECT l.groupingCol, SEGMENT_PARTITIONED_DISTINCT_COUNT(CONCAT(l.val, r.val)) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"comments": "table aren't actually partitioned by val thus all segments can produce duplicate results, thus [[b, 4], [a, 4]]",
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ groupingCol, SEGMENT_PARTITIONED_DISTINCT_COUNT(val) FROM {tbl1} GROUP BY groupingCol",
"outputs": [["b", 4], ["a", 4]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, SEGMENT_PARTITIONED_DISTINCT_COUNT(l.val), SEGMENT_PARTITIONED_DISTINCT_COUNT(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 2, 3], ["a", 2, 3]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, SEGMENT_PARTITIONED_DISTINCT_COUNT(CONCAT(l.val, r.val)) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"sql": "SELECT DISTINCTCOUNTHLL(val) FROM {tbl1}",
"outputs": [[3]]
},
{
"sql": "SELECT groupingCol, DISTINCTCOUNTHLL(val) FROM {tbl1} GROUP BY groupingCol",
"sql": "SELECT groupingCol, DISTINCTCOUNTHLL(val, 8) FROM {tbl1} GROUP BY groupingCol",
"outputs": [["b", 2], ["a", 2]]
},
{
"sql": "SELECT l.groupingCol, DISTINCTCOUNTHLL(l.val), DISTINCTCOUNTHLL(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 2, 3], ["a", 2, 3]]
},
{
"sql": "SELECT l.groupingCol, DISTINCTCOUNTHLL(CONCAT(l.val, r.val)) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"sql": "SELECT l.groupingCol, DISTINCTCOUNTHLL(CONCAT(l.val, r.val), 8) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ groupingCol, DISTINCTCOUNT(val) FROM {tbl1} GROUP BY groupingCol",
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ groupingCol, DISTINCTCOUNTHLL(val, 8) FROM {tbl1} GROUP BY groupingCol",
"outputs": [["b", 2], ["a", 2]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNT(l.val), DISTINCTCOUNT(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTHLL(l.val), DISTINCTCOUNTHLL(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 2, 3], ["a", 2, 3]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNT(CONCAT(l.val, r.val)) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTHLL(CONCAT(l.val, r.val), 8) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ groupingCol, DISTINCTCOUNTHLL(val) FROM {tbl1} GROUP BY groupingCol",
"sql": "SELECT DISTINCTCOUNTSMARTHLL(val, 'hllLog2m=2') FROM {tbl1}",
"outputs": [[3]]
},
{
"sql": "SELECT groupingCol, DISTINCTCOUNTSMARTHLL(val, 'hllConversionThreshold=10;hllLog2m=8') FROM {tbl1} GROUP BY groupingCol",
"outputs": [["b", 2], ["a", 2]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTHLL(l.val), DISTINCTCOUNTHLL(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"sql": "SELECT l.groupingCol, DISTINCTCOUNTSMARTHLL(l.val), DISTINCTCOUNTSMARTHLL(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 2, 3], ["a", 2, 3]]
},
{
"sql": "SELECT l.groupingCol, DISTINCTCOUNTSMARTHLL(CONCAT(l.val, r.val), 'threshold=10;hllLog2m=2') FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ groupingCol, DISTINCTCOUNTSMARTHLL(val, 'hllConversionThreshold=10;hllLog2m=8') FROM {tbl1} GROUP BY groupingCol",
"outputs": [["b", 2], ["a", 2]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTSMARTHLL(l.val), DISTINCTCOUNTSMARTHLL(r.val) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 2, 3], ["a", 2, 3]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTHLL(CONCAT(l.val, r.val)) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTSMARTHLL(CONCAT(l.val, r.val), 'threshold=10;hllLog2m=2') FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["b", 6], ["a", 6]]
},
{
"sql": "SELECT DISTINCTCOUNTRAWHLL(val, 2) FROM {tbl1}",
"outputs": [["000000020000000400000460"]]
},
{
"sql": "SELECT groupingCol, DISTINCTCOUNTRAWHLL(val) FROM {tbl1} GROUP BY groupingCol",
"outputs": [["a", "00000008000000ac00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"], ["b", "00000008000000ac00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"]]
},
{
"sql": "SELECT l.groupingCol, DISTINCTCOUNTRAWHLL(l.val, 2), DISTINCTCOUNTRAWHLL(r.val, 2) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["a", "000000020000000400000060", "000000020000000400010060"], ["b", "000000020000000400000420", "000000020000000400000480"]]
},
{
"sql": "SELECT l.groupingCol, DISTINCTCOUNTRAWHLL(CONCAT(l.val, r.val), 2) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["a", "000000020000000400028842"], ["b", "000000020000000400008405"]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ groupingCol, DISTINCTCOUNTRAWHLL(val) FROM {tbl1} GROUP BY groupingCol",
"outputs": [["a", "00000008000000ac00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"], ["b", "00000008000000ac00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTRAWHLL(l.val, 2), DISTINCTCOUNTRAWHLL(r.val, 2) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["a", "000000020000000400000060", "000000020000000400010060"], ["b", "000000020000000400000420", "000000020000000400000480"]]
},
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ l.groupingCol, DISTINCTCOUNTRAWHLL(CONCAT(l.val, r.val), 2) FROM {tbl1} l JOIN {tbl2} r ON l.groupingCol = r.groupingCol GROUP BY l.groupingCol",
"outputs": [["a", "000000020000000400028842"], ["b", "000000020000000400008405"]]
}
]
}
Expand Down
Loading