planner: optimize the performance of SELECT count(*)
#37165
Description
Enhancement
mysql > explain analyze select count(*) from github_events;
+------------------------------+---------------+------------+--------------+---------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+---------+------+
| id | estRows | actRows | task | access object | execution info | operator info | memory | disk |
+------------------------------+---------------+------------+--------------+---------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+---------+------+
| HashAgg_92 | 1.00 | 1 | root | | time:3.59s, loops:2, partial_worker:{wall_time:3.586945619s, concurrency:5, task_num:1, tot_wait:17.934327667s, tot_exec:13.429µs, tot_time:17.934350874s, max:3.586886692s, p95:3.586886692s}, final_worker:{wall_time:3.586962598s, concurrency:5, task_num:1, tot_wait:17.934505317s, tot_exec:19.739µs, tot_time:17.934528686s, max:3.586912325s, p95:3.586912325s} | funcs:count(Column#83)->Column#33 | 9.86 KB | N/A |
| └─TableReader_94 | 1.00 | 4 | root | partition:all | time:3.59s, loops:2, cop_task: {num: 6, max: 0s, min: 0s, avg: 0s, p95: 0s, copr_cache_hit_ratio: 0.00} | data:ExchangeSender_93 | N/A | N/A |
| └─ExchangeSender_93 | 1.00 | 4 | mpp[tiflash] | | tiflash_task:{proc max:3.57s, min:3.21s, avg: 3.37s, p80:3.57s, p95:3.57s, iters:4, tasks:4, threads:88} | ExchangeType: PassThrough | N/A | N/A |
| └─HashAgg_8 | 1.00 | 4 | mpp[tiflash] | | tiflash_task:{proc max:3.57s, min:3.21s, avg: 3.37s, p80:3.57s, p95:3.57s, iters:4, tasks:4, threads:4} | funcs:count(1)->Column#83 | N/A | N/A |
| └─TableFullScan_91 | 4897975649.00 | 4907282882 | mpp[tiflash] | table:github_events | tiflash_task:{proc max:3.37s, min:3.07s, avg: 3.21s, p80:3.37s, p95:3.37s, iters:78475, tasks:4, threads:88} | keep order:false, PartitionTableScan:true | N/A | N/A |
+------------------------------+---------------+------------+--------------+---------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+---------+------+
5 rows in set (3.63 sec)
mysql > explain analyze select count(actor_id) from github_events;
+------------------------------+---------------+------------+--------------+---------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------+---------+------+
| id | estRows | actRows | task | access object | execution info | operator info | memory | disk |
+------------------------------+---------------+------------+--------------+---------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------+---------+------+
| HashAgg_23 | 1.00 | 1 | root | | time:2.83s, loops:2, partial_worker:{wall_time:2.830765746s, concurrency:5, task_num:1, tot_wait:14.153378614s, tot_exec:5.102µs, tot_time:14.153392452s, max:2.830691203s, p95:2.830691203s}, final_worker:{wall_time:2.830816995s, concurrency:5, task_num:1, tot_wait:14.153613809s, tot_exec:60.547µs, tot_time:14.153677414s, max:2.830743316s, p95:2.830743316s} | funcs:count(Column#35)->Column#33 | 9.86 KB | N/A |
| └─TableReader_25 | 1.00 | 4 | root | partition:all | time:2.83s, loops:2, cop_task: {num: 5, max: 0s, min: 0s, avg: 0s, p95: 0s, copr_cache_hit_ratio: 0.00} | data:ExchangeSender_24 | N/A | N/A |
| └─ExchangeSender_24 | 1.00 | 4 | mpp[tiflash] | | tiflash_task:{proc max:2.81s, min:2.24s, avg: 2.56s, p80:2.81s, p95:2.81s, iters:4, tasks:4, threads:88} | ExchangeType: PassThrough | N/A | N/A |
| └─HashAgg_8 | 1.00 | 4 | mpp[tiflash] | | tiflash_task:{proc max:2.81s, min:2.24s, avg: 2.56s, p80:2.81s, p95:2.81s, iters:4, tasks:4, threads:4} | funcs:count(gharchive_dev.github_events.actor_id)->Column#35 | N/A | N/A |
| └─TableFullScan_22 | 4897975649.00 | 4907282882 | mpp[tiflash] | table:github_events | tiflash_task:{proc max:2.78s, min:2.22s, avg: 2.53s, p80:2.78s, p95:2.78s, iters:80546, tasks:4, threads:88} | keep order:false, PartitionTableScan:true | N/A | N/A |
+------------------------------+---------------+------------+--------------+---------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------+---------+------+
5 rows in set (2.88 sec)
Now we scan the primary key column in default. We can improve the performance of SELECT count(*)
by scanning the del_tag column rather than the pk column.
BTW, the following case will be improved, too.
mysql> explain analyze select count(*) from (select actor_id from github_events) as a;
+------------------------------+---------------+------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+---------+------+
| id | estRows | actRows | task | access object | execution info | operator info | memory | disk |
+------------------------------+---------------+------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+---------+------+
| HashAgg_93 | 1.00 | 1 | root | | time:3.67s, loops:2, partial_worker:{wall_time:3.671883051s, concurrency:5, task_num:1, tot_wait:18.359093871s, tot_exec:5.013µs, tot_time:18.359105489s, max:3.671827322s, p95:3.671827322s}, final_worker:{wall_time:3.67190697s, concurrency:5, task_num:1, tot_wait:18.359126767s, tot_exec:27.704µs, tot_time:18.359157552s, max:3.671852045s, p95:3.671852045s} | funcs:count(Column#83)->Column#33 | 9.86 KB | N/A |
| └─TableReader_95 | 1.00 | 4 | root | partition:all | time:3.67s, loops:2, cop_task: {num: 7, max: 0s, min: 0s, avg: 0s, p95: 0s, copr_cache_hit_ratio: 0.00} | data:ExchangeSender_94 | N/A | N/A |
| └─ExchangeSender_94 | 1.00 | 4 | mpp[tiflash] | | tiflash_task:{proc max:3.65s, min:3.25s, avg: 3.4s, p80:3.65s, p95:3.65s, iters:4, tasks:4, threads:88} | ExchangeType: PassThrough | N/A | N/A |
| └─HashAgg_9 | 1.00 | 4 | mpp[tiflash] | | tiflash_task:{proc max:3.65s, min:3.25s, avg: 3.4s, p80:3.65s, p95:3.65s, iters:4, tasks:4, threads:4} | funcs:count(1)->Column#83 | N/A | N/A |
| └─TableFullScan_92 | 4898336416.00 | 4907409773 | mpp[tiflash] | table:github_events | tiflash_task:{proc max:3.43s, min:3.12s, avg: 3.23s, p80:3.43s, p95:3.43s, iters:78512, tasks:4, threads:88} | keep order:false, PartitionTableScan:true | N/A | N/A |
+------------------------------+---------------+------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------+---------+------+
Activity