Skip to content

Commit b647375

Browse files
committed
[optimize](count) optimize pk exact query without reading data
1 parent 1726834 commit b647375

File tree

4 files changed

+284
-0
lines changed

4 files changed

+284
-0
lines changed

be/src/olap/rowset/segment_v2/segment_iterator.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,6 +1832,9 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
18321832

18331833
for (auto cid : _first_read_column_ids) {
18341834
auto& column = _current_return_columns[cid];
1835+
if (_need_read_pk_data(cid, column, nrows_read)) {
1836+
continue;
1837+
}
18351838
if (_prune_column(cid, column, true, nrows_read)) {
18361839
continue;
18371840
}
@@ -2575,5 +2578,68 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
25752578
}
25762579
}
25772580

2581+
bool SegmentIterator::_need_read_pk_data(ColumnId cid, vectorized::MutableColumnPtr& column,
2582+
size_t nrows_read) {
2583+
if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
2584+
return false;
2585+
}
2586+
2587+
if (_opts.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) {
2588+
return false;
2589+
}
2590+
2591+
if (!_opts.tablet_schema->column(cid).is_key()) {
2592+
return false;
2593+
}
2594+
2595+
// Whether the primary key index is an exact query
2596+
{
2597+
auto get_max_datetime_string = [](FieldType filed_type) {
2598+
std::string res;
2599+
if (filed_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2) {
2600+
auto max = type_limit<PrimitiveTypeTraits<TYPE_DATETIMEV2>::CppType>::max();
2601+
res.resize(30);
2602+
max.to_string(res.data());
2603+
} else if (filed_type == FieldType::OLAP_FIELD_TYPE_INT) {
2604+
auto max = type_limit<PrimitiveTypeTraits<TYPE_INT>::CppType>::max();
2605+
res = boost::lexical_cast<std::string>(max);
2606+
}
2607+
return res;
2608+
};
2609+
2610+
// It is not an exact query if the following two conditions are met:
2611+
// 1. lower is null
2612+
// 2. upper is not null and is the maximum value
2613+
for (auto& key_range : _opts.key_ranges) {
2614+
auto lower_tuple = key_range.lower_key->to_tuple();
2615+
auto upper_tuple = key_range.upper_key->to_tuple();
2616+
for (size_t cid = 0; cid < lower_tuple.size(); cid++) {
2617+
if (lower_tuple.is_null(cid) && !upper_tuple.is_null(cid)) {
2618+
const auto* field = key_range.upper_key->column_schema(cid);
2619+
auto pk_max = get_max_datetime_string(field->type());
2620+
if (pk_max.empty()) {
2621+
return false;
2622+
}
2623+
2624+
const auto& pk_value = upper_tuple.get_value(cid);
2625+
if (pk_max != pk_value) {
2626+
return false;
2627+
}
2628+
}
2629+
}
2630+
}
2631+
}
2632+
2633+
if (column->is_nullable()) {
2634+
auto* nullable_col_ptr = reinterpret_cast<vectorized::ColumnNullable*>(column.get());
2635+
nullable_col_ptr->get_null_map_column().insert_many_defaults(nrows_read);
2636+
nullable_col_ptr->get_nested_column_ptr()->insert_many_defaults(nrows_read);
2637+
} else {
2638+
column->insert_many_defaults(nrows_read);
2639+
}
2640+
2641+
return true;
2642+
}
2643+
25782644
} // namespace segment_v2
25792645
} // namespace doris

be/src/olap/rowset/segment_v2/segment_iterator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,8 @@ class SegmentIterator : public RowwiseIterator {
380380

381381
Status _convert_to_expected_type(const std::vector<ColumnId>& col_ids);
382382

383+
bool _need_read_pk_data(ColumnId cid, vectorized::MutableColumnPtr& column, size_t nrows_read);
384+
383385
class BitmapRangeIterator;
384386
class BackwardBitmapRangeIterator;
385387

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql --
3+
974
4+
5+
-- !sql --
6+
974
7+
8+
-- !sql --
9+
839
10+
11+
-- !sql --
12+
839
13+
14+
-- !sql --
15+
271
16+
17+
-- !sql --
18+
271
19+
20+
-- !sql --
21+
913
22+
23+
-- !sql --
24+
913
25+
26+
-- !sql --
27+
14
28+
29+
-- !sql --
30+
14
31+
32+
-- !sql --
33+
15
34+
35+
-- !sql --
36+
15
37+
38+
-- !sql --
39+
4
40+
41+
-- !sql --
42+
4
43+
44+
-- !sql --
45+
15
46+
47+
-- !sql --
48+
15
49+
50+
-- !sql --
51+
827
52+
53+
-- !sql --
54+
827
55+
56+
-- !sql --
57+
970
58+
59+
-- !sql --
60+
970
61+
62+
-- !sql --
63+
10
64+
65+
-- !sql --
66+
10
67+
68+
-- !sql --
69+
970
70+
71+
-- !sql --
72+
970
73+
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
19+
suite("test_count_on_index_2", "p0"){
20+
def indexTbName1 = "test_count_on_index_2_index"
21+
def indexTbName2 = "test_count_on_index_2_no_index"
22+
23+
sql "DROP TABLE IF EXISTS ${indexTbName1}"
24+
25+
sql """
26+
CREATE TABLE ${indexTbName1} (
27+
`@timestamp` int(11) NULL COMMENT "",
28+
`clientip` varchar(20) NULL COMMENT "",
29+
`request` text NULL COMMENT "",
30+
`status` int(11) NULL COMMENT "",
31+
`size` int(11) NULL COMMENT "",
32+
INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
33+
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
34+
INDEX status_idx (`status`) USING INVERTED COMMENT '',
35+
INDEX size_idx (`size`) USING INVERTED COMMENT ''
36+
) ENGINE=OLAP
37+
DUPLICATE KEY(`@timestamp`)
38+
COMMENT "OLAP"
39+
DISTRIBUTED BY RANDOM BUCKETS 1
40+
PROPERTIES (
41+
"replication_allocation" = "tag.location.default: 1"
42+
);
43+
"""
44+
45+
sql "DROP TABLE IF EXISTS ${indexTbName2}"
46+
47+
sql """
48+
CREATE TABLE ${indexTbName2} (
49+
`@timestamp` int(11) NULL COMMENT "",
50+
`clientip` varchar(20) NULL COMMENT "",
51+
`request` text NULL COMMENT "",
52+
`status` int(11) NULL COMMENT "",
53+
`size` int(11) NULL COMMENT ""
54+
) ENGINE=OLAP
55+
DUPLICATE KEY(`@timestamp`)
56+
COMMENT "OLAP"
57+
DISTRIBUTED BY RANDOM BUCKETS 1
58+
PROPERTIES (
59+
"replication_allocation" = "tag.location.default: 1"
60+
);
61+
"""
62+
63+
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
64+
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
65+
66+
// load the json data
67+
streamLoad {
68+
table "${table_name}"
69+
70+
// set http request header params
71+
set 'label', label + "_" + UUID.randomUUID().toString()
72+
set 'read_json_by_line', read_flag
73+
set 'format', format_flag
74+
file file_name // import json file
75+
time 10000 // limit inflight 10s
76+
if (expected_succ_rows >= 0) {
77+
set 'max_filter_ratio', '1'
78+
}
79+
80+
// if declared a check callback, the default check condition will ignore.
81+
// So you must check all condition
82+
check { result, exception, startTime, endTime ->
83+
if (ignore_failure && expected_succ_rows < 0) { return }
84+
if (exception != null) {
85+
throw exception
86+
}
87+
log.info("Stream load result: ${result}".toString())
88+
def json = parseJson(result)
89+
assertEquals("success", json.Status.toLowerCase())
90+
if (expected_succ_rows >= 0) {
91+
assertEquals(json.NumberLoadedRows, expected_succ_rows)
92+
} else {
93+
assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
94+
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
95+
}
96+
}
97+
}
98+
}
99+
100+
try {
101+
load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json')
102+
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
103+
104+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
105+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
106+
107+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images'); """
108+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images'); """
109+
110+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' and request match 'english'); """
111+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' and request match 'english'); """
112+
113+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' or request match 'english'); """
114+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' or request match 'english'); """
115+
116+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0'); """
117+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0'); """
118+
119+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' or clientip = '252.0.0.0'); """
120+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' or clientip = '252.0.0.0'); """
121+
122+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and request match 'hm'); """
123+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and request match 'hm'); """
124+
125+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and clientip in ('247.37.0.0', '252.0.0.0'); """
126+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and clientip in ('247.37.0.0', '252.0.0.0'); """
127+
128+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200); """
129+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200); """
130+
131+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200 or status = 304); """
132+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200 or status = 304); """
133+
134+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and status = 200); """
135+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and status = 200); """
136+
137+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and status in (200, 304); """
138+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and status in (200, 304); """
139+
140+
} finally {
141+
//try_sql("DROP TABLE IF EXISTS ${testTable}")
142+
}
143+
}

0 commit comments

Comments
 (0)