Skip to content

Commit 0d07ac0

Browse files
committed
[optimize](count) optimize pk exact query without reading data
1 parent 4ddef31 commit 0d07ac0

File tree

4 files changed

+351
-0
lines changed

4 files changed

+351
-0
lines changed

be/src/olap/rowset/segment_v2/segment_iterator.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,6 +1832,9 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
18321832

18331833
for (auto cid : _first_read_column_ids) {
18341834
auto& column = _current_return_columns[cid];
1835+
if (_need_read_key_data(cid, column, nrows_read)) {
1836+
continue;
1837+
}
18351838
if (_prune_column(cid, column, true, nrows_read)) {
18361839
continue;
18371840
}
@@ -2575,5 +2578,43 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
25752578
}
25762579
}
25772580

2581+
bool SegmentIterator::_need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column,
2582+
size_t nrows_read) {
2583+
if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
2584+
return false;
2585+
}
2586+
2587+
if (_opts.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) {
2588+
return false;
2589+
}
2590+
2591+
if (!_opts.tablet_schema->column(cid).is_key()) {
2592+
return false;
2593+
}
2594+
2595+
std::set<uint32_t> cids;
2596+
for (auto* pred : _col_predicates) {
2597+
cids.insert(pred->column_id());
2598+
}
2599+
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
2600+
cids.insert(pred->column_id());
2601+
}
2602+
2603+
// If the key is present in expr, data needs to be read.
2604+
if (cids.contains(cid)) {
2605+
return false;
2606+
}
2607+
2608+
if (column->is_nullable()) {
2609+
auto* nullable_col_ptr = reinterpret_cast<vectorized::ColumnNullable*>(column.get());
2610+
nullable_col_ptr->get_null_map_column().insert_many_defaults(nrows_read);
2611+
nullable_col_ptr->get_nested_column_ptr()->insert_many_defaults(nrows_read);
2612+
} else {
2613+
column->insert_many_defaults(nrows_read);
2614+
}
2615+
2616+
return true;
2617+
}
2618+
25782619
} // namespace segment_v2
25792620
} // namespace doris

be/src/olap/rowset/segment_v2/segment_iterator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,8 @@ class SegmentIterator : public RowwiseIterator {
380380

381381
Status _convert_to_expected_type(const std::vector<ColumnId>& col_ids);
382382

383+
bool _need_read_key_data(ColumnId cid, vectorized::MutableColumnPtr& column, size_t nrows_read);
384+
383385
class BitmapRangeIterator;
384386
class BackwardBitmapRangeIterator;
385387

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql --
3+
974
4+
5+
-- !sql --
6+
974
7+
8+
-- !sql --
9+
839
10+
11+
-- !sql --
12+
839
13+
14+
-- !sql --
15+
271
16+
17+
-- !sql --
18+
271
19+
20+
-- !sql --
21+
913
22+
23+
-- !sql --
24+
913
25+
26+
-- !sql --
27+
14
28+
29+
-- !sql --
30+
14
31+
32+
-- !sql --
33+
15
34+
35+
-- !sql --
36+
15
37+
38+
-- !sql --
39+
4
40+
41+
-- !sql --
42+
4
43+
44+
-- !sql --
45+
15
46+
47+
-- !sql --
48+
15
49+
50+
-- !sql --
51+
827
52+
53+
-- !sql --
54+
827
55+
56+
-- !sql --
57+
970
58+
59+
-- !sql --
60+
970
61+
62+
-- !sql --
63+
10
64+
65+
-- !sql --
66+
10
67+
68+
-- !sql --
69+
970
70+
71+
-- !sql --
72+
970
73+
74+
-- !sql --
75+
11
76+
77+
-- !sql --
78+
9
79+
80+
-- !sql --
81+
21
82+
83+
-- !sql --
84+
19
85+
86+
-- !sql --
87+
11
88+
89+
-- !sql --
90+
10
91+
92+
-- !sql --
93+
6
94+
95+
-- !sql --
96+
7
97+
98+
-- !sql --
99+
0
100+
101+
-- !sql --
102+
3
103+
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
19+
suite("test_count_on_index_2", "p0"){
20+
def indexTbName1 = "test_count_on_index_2_index"
21+
def indexTbName2 = "test_count_on_index_2_no_index"
22+
def indexTbName3 = "test_count_on_index_2_pk"
23+
24+
sql "DROP TABLE IF EXISTS ${indexTbName1}"
25+
26+
sql """
27+
CREATE TABLE ${indexTbName1} (
28+
`@timestamp` int(11) NULL COMMENT "",
29+
`clientip` varchar(20) NULL COMMENT "",
30+
`request` text NULL COMMENT "",
31+
`status` int(11) NULL COMMENT "",
32+
`size` int(11) NULL COMMENT "",
33+
INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
34+
INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '',
35+
INDEX status_idx (`status`) USING INVERTED COMMENT '',
36+
INDEX size_idx (`size`) USING INVERTED COMMENT ''
37+
) ENGINE=OLAP
38+
DUPLICATE KEY(`@timestamp`)
39+
COMMENT "OLAP"
40+
DISTRIBUTED BY RANDOM BUCKETS 1
41+
PROPERTIES (
42+
"replication_allocation" = "tag.location.default: 1"
43+
);
44+
"""
45+
46+
sql "DROP TABLE IF EXISTS ${indexTbName2}"
47+
48+
sql """
49+
CREATE TABLE ${indexTbName2} (
50+
`@timestamp` int(11) NULL COMMENT "",
51+
`clientip` varchar(20) NULL COMMENT "",
52+
`request` text NULL COMMENT "",
53+
`status` int(11) NULL COMMENT "",
54+
`size` int(11) NULL COMMENT ""
55+
) ENGINE=OLAP
56+
DUPLICATE KEY(`@timestamp`)
57+
COMMENT "OLAP"
58+
DISTRIBUTED BY RANDOM BUCKETS 1
59+
PROPERTIES (
60+
"replication_allocation" = "tag.location.default: 1"
61+
);
62+
"""
63+
64+
sql "DROP TABLE IF EXISTS ${indexTbName3}"
65+
66+
sql """
67+
CREATE TABLE ${indexTbName3} (
68+
`a` int NULL COMMENT "",
69+
`b` int NULL COMMENT "",
70+
`c` int NULL COMMENT ""
71+
) ENGINE=OLAP
72+
DUPLICATE KEY(`a`, `b`, `c`)
73+
COMMENT "OLAP"
74+
DISTRIBUTED BY RANDOM BUCKETS 1
75+
PROPERTIES (
76+
"replication_allocation" = "tag.location.default: 1"
77+
);
78+
"""
79+
80+
sql """
81+
INSERT INTO ${indexTbName3} VALUES
82+
(1, 1, 1),
83+
(2, 2, 2),
84+
(3, 3, 3),
85+
(4, 4, 4),
86+
(5, 5, 5),
87+
(6, 6, 6),
88+
(7, 7, 7),
89+
(8, 8, 8),
90+
(9, 9, 9),
91+
(10, 10, 10),
92+
(11, 11, 11),
93+
(12, 12, 12),
94+
(13, 13, 13),
95+
(14, 14, 14),
96+
(15, 15, 15),
97+
(16, 16, 16),
98+
(17, 17, 17),
99+
(18, 18, 18),
100+
(19, 19, 19),
101+
(20, 20, 20),
102+
(21, 21, 21),
103+
(22, 22, 22),
104+
(23, 23, 23),
105+
(24, 24, 24),
106+
(25, 25, 25),
107+
(26, 26, 26),
108+
(27, 27, 27),
109+
(28, 28, 28),
110+
(29, 29, 29),
111+
(30, 30, 30);
112+
"""
113+
114+
def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
115+
expected_succ_rows = -1, load_to_single_tablet = 'true' ->
116+
117+
// load the json data
118+
streamLoad {
119+
table "${table_name}"
120+
121+
// set http request header params
122+
set 'label', label + "_" + UUID.randomUUID().toString()
123+
set 'read_json_by_line', read_flag
124+
set 'format', format_flag
125+
file file_name // import json file
126+
time 10000 // limit inflight 10s
127+
if (expected_succ_rows >= 0) {
128+
set 'max_filter_ratio', '1'
129+
}
130+
131+
// if declared a check callback, the default check condition will ignore.
132+
// So you must check all condition
133+
check { result, exception, startTime, endTime ->
134+
if (ignore_failure && expected_succ_rows < 0) { return }
135+
if (exception != null) {
136+
throw exception
137+
}
138+
log.info("Stream load result: ${result}".toString())
139+
def json = parseJson(result)
140+
assertEquals("success", json.Status.toLowerCase())
141+
if (expected_succ_rows >= 0) {
142+
assertEquals(json.NumberLoadedRows, expected_succ_rows)
143+
} else {
144+
assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows)
145+
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
146+
}
147+
}
148+
}
149+
}
150+
151+
try {
152+
load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json')
153+
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
154+
155+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
156+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
157+
158+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images'); """
159+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images'); """
160+
161+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' and request match 'english'); """
162+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' and request match 'english'); """
163+
164+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' or request match 'english'); """
165+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (request match 'images' or request match 'english'); """
166+
167+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0'); """
168+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0'); """
169+
170+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' or clientip = '252.0.0.0'); """
171+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' or clientip = '252.0.0.0'); """
172+
173+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and request match 'hm'); """
174+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and request match 'hm'); """
175+
176+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and clientip in ('247.37.0.0', '252.0.0.0'); """
177+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and clientip in ('247.37.0.0', '252.0.0.0'); """
178+
179+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200); """
180+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200); """
181+
182+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200 or status = 304); """
183+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (status = 200 or status = 304); """
184+
185+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and status = 200); """
186+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and (clientip = '247.37.0.0' and status = 200); """
187+
188+
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and status in (200, 304); """
189+
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453 and status in (200, 304); """
190+
191+
qt_sql """ select count() from ${indexTbName3} where (a >= 5 and a <= 15); """
192+
qt_sql """ select count() from ${indexTbName3} where (a > 5 and a < 15); """
193+
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a <= 27); """
194+
qt_sql """ select count() from ${indexTbName3} where (a > 7 and a < 27); """
195+
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a <= 27) and (b >= 10 and b <= 20); """
196+
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a < 27) and (b >= 10 and b < 20); """
197+
qt_sql """ select count() from ${indexTbName3} where (a >= 7 and a <= 27) and (b >= 10 and b < 20) and (c >= 12 and c < 18); """
198+
qt_sql """ select count() from ${indexTbName3} where (a >= 2 and a < 28) and (b >= 5 and b < 20) and (c >= 8 and c < 15); """
199+
qt_sql """ select count() from ${indexTbName3} where (a >= 10 and a < 20) and (b >= 5 and b < 14) and (c >= 16 and c < 25); """
200+
qt_sql """ select count() from ${indexTbName3} where (a >= 10 and a < 20) and (b >= 5 and b < 16) and (c >= 13 and c < 25); """
201+
202+
} finally {
203+
//try_sql("DROP TABLE IF EXISTS ${testTable}")
204+
}
205+
}

0 commit comments

Comments
 (0)