Skip to content

Commit

Permalink
[Enhancement]Add profile for parquet page index (StarRocks#38618)
Browse files Browse the repository at this point in the history
Signed-off-by: zombee0 <ewang2027@gmail.com>
  • Loading branch information
zombee0 authored Jan 8, 2024
1 parent 9f9ba18 commit 86e3b10
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 0 deletions.
3 changes: 3 additions & 0 deletions be/src/exec/hdfs_scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ struct HdfsScanStats {
bool has_page_statistics = false;
// page skip
int64_t page_skip = 0;
// page index
int64_t rows_before_page_index = 0;
int64_t page_index_ns = 0;

// late materialize round-by-round
int64_t group_min_round_cost = 0;
Expand Down
7 changes: 7 additions & 0 deletions be/src/exec/hdfs_scanner_parquet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ void HdfsParquetScanner::do_update_counter(HdfsScanProfile* profile) {
RuntimeProfile::Counter* page_skip = nullptr;
// round-by-round
RuntimeProfile::Counter* group_min_round_cost = nullptr;
// page index
RuntimeProfile::Counter* rows_before_page_index = nullptr;
RuntimeProfile::Counter* page_index_timer = nullptr;

RuntimeProfile* root = profile->runtime_profile;
ADD_COUNTER(root, kParquetProfileSectionPrefix, TUnit::NONE);
Expand Down Expand Up @@ -104,6 +107,8 @@ void HdfsParquetScanner::do_update_counter(HdfsScanProfile* profile) {
group_min_round_cost = root->AddLowWaterMarkCounter(
"GroupMinRound", TUnit::UNIT, RuntimeProfile::Counter::create_strategy(TCounterAggregateType::AVG),
kParquetProfileSectionPrefix);
rows_before_page_index = ADD_CHILD_COUNTER(root, "RowsBeforePageIndex", TUnit::UNIT, kParquetProfileSectionPrefix);
page_index_timer = ADD_CHILD_TIMER(root, "PageIndexTime", kParquetProfileSectionPrefix);

COUNTER_UPDATE(request_bytes_read, _app_stats.request_bytes_read);
COUNTER_UPDATE(request_bytes_read_uncompressed, _app_stats.request_bytes_read_uncompressed);
Expand All @@ -126,6 +131,8 @@ void HdfsParquetScanner::do_update_counter(HdfsScanProfile* profile) {
COUNTER_UPDATE(page_skip, _app_stats.page_skip);
group_min_round_cost->set(_app_stats.group_min_round_cost);
do_update_iceberg_v2_counter(root, kParquetProfileSectionPrefix);
COUNTER_UPDATE(rows_before_page_index, _app_stats.rows_before_page_index);
COUNTER_UPDATE(page_index_timer, _app_stats.page_index_ns);
}

Status HdfsParquetScanner::do_open(RuntimeState* runtime_state) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/formats/parquet/group_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ Status GroupReader::prepare() {
_init_read_chunk();
_range = SparseRange<uint64_t>(_row_group_first_row, _row_group_first_row + _row_group_metadata->num_rows);
if (config::parquet_page_index_enable) {
SCOPED_RAW_TIMER(&_param.stats->page_index_ns);
_param.stats->rows_before_page_index += _row_group_metadata->num_rows;
auto page_index_reader = std::make_unique<PageIndexReader>(this, _param.file, _column_readers,
_row_group_metadata, _param.min_max_conjunct_ctxs);
ASSIGN_OR_RETURN(bool flag, page_index_reader->generate_read_range(_range));
Expand Down

0 comments on commit 86e3b10

Please sign in to comment.