diff --git a/be/src/common/config.h b/be/src/common/config.h index 32ed0ca55d9a4..55780e3c6bd3c 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -277,6 +277,8 @@ CONF_Int64(index_stream_cache_capacity, "10737418240"); CONF_mString(storage_page_cache_limit, "20%"); // whether to disable page cache feature in storage CONF_mBool(disable_storage_page_cache, "false"); +// whether to enable the bitmap index memory cache +CONF_mBool(enable_bitmap_memory_page_cache, "false"); // whether to disable column pool CONF_Bool(disable_column_pool, "false"); diff --git a/be/src/storage/rowset/bitmap_index_reader.cpp b/be/src/storage/rowset/bitmap_index_reader.cpp index 623defd22c57a..27a476210ba47 100644 --- a/be/src/storage/rowset/bitmap_index_reader.cpp +++ b/be/src/storage/rowset/bitmap_index_reader.cpp @@ -88,11 +88,11 @@ Status BitmapIndexReader::_do_load(const IndexReadOptions& opts, const BitmapInd return Status::OK(); } -Status BitmapIndexReader::new_iterator(BitmapIndexIterator** iterator) { +Status BitmapIndexReader::new_iterator(BitmapIndexIterator** iterator, const IndexReadOptions& opts) { std::unique_ptr dict_iter; std::unique_ptr bitmap_iter; - RETURN_IF_ERROR(_dict_column_reader->new_iterator(&dict_iter)); - RETURN_IF_ERROR(_bitmap_column_reader->new_iterator(&bitmap_iter)); + RETURN_IF_ERROR(_dict_column_reader->new_iterator(&dict_iter, opts)); + RETURN_IF_ERROR(_bitmap_column_reader->new_iterator(&bitmap_iter, opts)); *iterator = new BitmapIndexIterator(this, std::move(dict_iter), std::move(bitmap_iter), _has_null, bitmap_nums()); return Status::OK(); } diff --git a/be/src/storage/rowset/bitmap_index_reader.h b/be/src/storage/rowset/bitmap_index_reader.h index d870bbeba6ce5..a158d0e656e40 100644 --- a/be/src/storage/rowset/bitmap_index_reader.h +++ b/be/src/storage/rowset/bitmap_index_reader.h @@ -71,7 +71,7 @@ class BitmapIndexReader { // create a new column iterator. Client should delete returned iterator // REQUIRES: the index data has been successfully `load()`ed into memory. - Status new_iterator(BitmapIndexIterator** iterator); + Status new_iterator(BitmapIndexIterator** iterator, const IndexReadOptions& opts); // REQUIRES: the index data has been successfully `load()`ed into memory. int64_t bitmap_nums() { return _bitmap_column_reader->num_values(); } diff --git a/be/src/storage/rowset/bloom_filter_index_reader.cpp b/be/src/storage/rowset/bloom_filter_index_reader.cpp index 54de744b71a6b..c55820642bc91 100644 --- a/be/src/storage/rowset/bloom_filter_index_reader.cpp +++ b/be/src/storage/rowset/bloom_filter_index_reader.cpp @@ -86,7 +86,8 @@ void BloomFilterIndexReader::_reset() { Status BloomFilterIndexReader::new_iterator(std::unique_ptr* iterator) { std::unique_ptr bf_iter; - RETURN_IF_ERROR(_bloom_filter_reader->new_iterator(&bf_iter)); + IndexReadOptions options; + RETURN_IF_ERROR(_bloom_filter_reader->new_iterator(&bf_iter, options)); iterator->reset(new BloomFilterIndexIterator(this, std::move(bf_iter))); return Status::OK(); } diff --git a/be/src/storage/rowset/column_reader.cpp b/be/src/storage/rowset/column_reader.cpp index d2cda9cada6bf..a1b7da22da228 100644 --- a/be/src/storage/rowset/column_reader.cpp +++ b/be/src/storage/rowset/column_reader.cpp @@ -263,9 +263,9 @@ Status ColumnReader::_init(ColumnMetaPB* meta) { } } -Status ColumnReader::new_bitmap_index_iterator(BitmapIndexIterator** iterator, bool skip_fill_local_cache) { - RETURN_IF_ERROR(_load_bitmap_index(skip_fill_local_cache)); - RETURN_IF_ERROR(_bitmap_index->new_iterator(iterator)); +Status ColumnReader::new_bitmap_index_iterator(const IndexReadOptions& options, BitmapIndexIterator** iterator) { + RETURN_IF_ERROR(_load_bitmap_index(options)); + RETURN_IF_ERROR(_bitmap_index->new_iterator(iterator, options)); return Status::OK(); } @@ -384,17 +384,11 @@ Status ColumnReader::_load_zonemap_index(bool skip_fill_local_cache) { return Status::OK(); } -Status ColumnReader::_load_bitmap_index(bool skip_fill_local_cache) { +Status ColumnReader::_load_bitmap_index(const IndexReadOptions& options) { if (_bitmap_index == nullptr || _bitmap_index->loaded()) return Status::OK(); SCOPED_THREAD_LOCAL_CHECK_MEM_LIMIT_SETTER(false); - IndexReadOptions opts; - opts.fs = file_system(); - opts.file_name = file_name(); - opts.use_page_cache = !config::disable_storage_page_cache; - opts.kept_in_memory = keep_in_memory(); - opts.skip_fill_local_cache = skip_fill_local_cache; auto meta = _bitmap_index_meta.get(); - ASSIGN_OR_RETURN(auto first_load, _bitmap_index->load(opts, *meta)); + ASSIGN_OR_RETURN(auto first_load, _bitmap_index->load(options, *meta)); if (UNLIKELY(first_load)) { MEM_TRACKER_SAFE_RELEASE(ExecEnv::GetInstance()->bitmap_index_mem_tracker(), _bitmap_index_meta->SpaceUsedLong()); diff --git a/be/src/storage/rowset/column_reader.h b/be/src/storage/rowset/column_reader.h index 2844996e868b6..0be64ff92e2eb 100644 --- a/be/src/storage/rowset/column_reader.h +++ b/be/src/storage/rowset/column_reader.h @@ -106,7 +106,7 @@ class ColumnReader { // Caller should free returned iterator after unused. // TODO: StatusOr> new_bitmap_index_iterator() - Status new_bitmap_index_iterator(BitmapIndexIterator** iterator, bool skip_fill_local_cache); + Status new_bitmap_index_iterator(const IndexReadOptions& options, BitmapIndexIterator** iterator); // Seek to the first entry in the column. Status seek_to_first(OrdinalPageIndexIterator* iter); @@ -173,7 +173,7 @@ class ColumnReader { Status _load_zonemap_index(bool skip_fill_local_cache); Status _load_ordinal_index(bool skip_fill_local_cache); - Status _load_bitmap_index(bool skip_fill_local_cache); + Status _load_bitmap_index(const IndexReadOptions& options); Status _load_bloom_filter_index(bool skip_fill_local_cache); Status _parse_zone_map(const ZoneMapPB& zm, ZoneMapDetail* detail) const; diff --git a/be/src/storage/rowset/indexed_column_reader.cpp b/be/src/storage/rowset/indexed_column_reader.cpp index 4a1860f08e570..59b60b8eac2c7 100644 --- a/be/src/storage/rowset/indexed_column_reader.cpp +++ b/be/src/storage/rowset/indexed_column_reader.cpp @@ -84,19 +84,22 @@ Status IndexedColumnReader::load_index_page(RandomAccessFile* read_file, const P IndexPageReader* reader) { Slice body; PageFooterPB footer; - RETURN_IF_ERROR(read_page(read_file, PagePointer(pp), handle, &body, &footer)); + RETURN_IF_ERROR(read_page(read_file, PagePointer(pp), handle, &body, &footer, nullptr)); RETURN_IF_ERROR(reader->parse(body, footer.index_page_footer())); return Status::OK(); } Status IndexedColumnReader::read_page(RandomAccessFile* read_file, const PagePointer& pp, PageHandle* handle, - Slice* body, PageFooterPB* footer) const { + Slice* body, PageFooterPB* footer, OlapReaderStatistics* stats) const { PageReadOptions opts; opts.read_file = read_file; opts.page_pointer = pp; opts.codec = _compress_codec; OlapReaderStatistics tmp_stats; opts.stats = &tmp_stats; + if (stats != nullptr) { + opts.stats = stats; + } opts.use_page_cache = _use_page_cache; opts.kept_in_memory = _kept_in_memory; opts.encoding_type = _encoding_info->encoding(); @@ -104,18 +107,21 @@ Status IndexedColumnReader::read_page(RandomAccessFile* read_file, const PagePoi return PageIO::read_and_decompress_page(opts, handle, body, footer); } -Status IndexedColumnReader::new_iterator(std::unique_ptr* iter) { +Status IndexedColumnReader::new_iterator(std::unique_ptr* iter, const IndexReadOptions& opts) { RandomAccessFileOptions file_opts{.skip_fill_local_cache = _skip_fill_local_cache}; ASSIGN_OR_RETURN(auto file, _fs->new_random_access_file(file_opts, _file_name)); - iter->reset(new IndexedColumnIterator(this, std::move(file))); + + IndexedColumnIteratorOptions index_opts; + index_opts.read_file = std::move(file); + index_opts.stats = opts.stats; + iter->reset(new IndexedColumnIterator(this, std::move(index_opts))); return Status::OK(); } /////////////////////////////////////////////////////////////////////////////// -IndexedColumnIterator::IndexedColumnIterator(const IndexedColumnReader* reader, - std::unique_ptr read_file) +IndexedColumnIterator::IndexedColumnIterator(const IndexedColumnReader* reader, IndexedColumnIteratorOptions opts) : _reader(reader), - _read_file(std::move(read_file)), + _opts(std::move(opts)), _ordinal_iter(&reader->_ordinal_index_reader), _value_iter(&reader->_value_index_reader) {} @@ -123,7 +129,7 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) { PageHandle handle; Slice body; PageFooterPB footer; - RETURN_IF_ERROR(_reader->read_page(_read_file.get(), pp, &handle, &body, &footer)); + RETURN_IF_ERROR(_reader->read_page(_opts.read_file.get(), pp, &handle, &body, &footer, _opts.stats)); // parse data page // note that page_index is not used in IndexedColumnIterator, so we pass 0 return parse_page(&_data_page, std::move(handle), body, footer.data_page_footer(), _reader->encoding_info(), pp, 0); diff --git a/be/src/storage/rowset/indexed_column_reader.h b/be/src/storage/rowset/indexed_column_reader.h index f3af37e68bffd..7dc7207f12361 100644 --- a/be/src/storage/rowset/indexed_column_reader.h +++ b/be/src/storage/rowset/indexed_column_reader.h @@ -57,6 +57,11 @@ class TypeInfo; class EncodingInfo; class IndexedColumnReader; +struct IndexedColumnIteratorOptions { + std::unique_ptr read_file; + OlapReaderStatistics* stats = nullptr; +}; + class IndexedColumnIterator { friend class IndexedColumnReader; @@ -86,12 +91,13 @@ class IndexedColumnIterator { Status next_batch(size_t* n, Column* column); private: - IndexedColumnIterator(const IndexedColumnReader* reader, std::unique_ptr read_file); + IndexedColumnIterator(const IndexedColumnReader* reader, IndexedColumnIteratorOptions opts); Status _read_data_page(const PagePointer& pp); const IndexedColumnReader* _reader = nullptr; - std::unique_ptr _read_file; + IndexedColumnIteratorOptions _opts; + // iterator for ordinal index page IndexPageIterator _ordinal_iter; // iterator for value index page @@ -104,7 +110,6 @@ class IndexedColumnIterator { std::unique_ptr _data_page; // next_batch() will read from this position ordinal_t _current_ordinal = 0; - // open file handle }; // thread-safe reader for IndexedColumn (see comments of `IndexedColumnWriter` to understand what IndexedColumn is) @@ -123,7 +128,7 @@ class IndexedColumnReader { Status load(); - Status new_iterator(std::unique_ptr* iter); + Status new_iterator(std::unique_ptr* iter, const IndexReadOptions& opts); int64_t num_values() const { return _num_values; } const EncodingInfo* encoding_info() const { return _encoding_info; } @@ -143,7 +148,7 @@ class IndexedColumnReader { // read a page specified by `pp' from `file' into `handle' Status read_page(RandomAccessFile* read_file, const PagePointer& pp, PageHandle* handle, Slice* body, - PageFooterPB* footer) const; + PageFooterPB* footer, OlapReaderStatistics* stats) const; FileSystem* _fs; std::string _file_name; diff --git a/be/src/storage/rowset/options.h b/be/src/storage/rowset/options.h index 995d198df8e02..fb543cf322c25 100644 --- a/be/src/storage/rowset/options.h +++ b/be/src/storage/rowset/options.h @@ -53,12 +53,13 @@ class PageBuilderOptions { class IndexReadOptions { public: - FileSystem* fs = nullptr; - std::string file_name = ""; bool use_page_cache = false; bool kept_in_memory = false; // for lake tablet bool skip_fill_local_cache = false; + std::string file_name = ""; + OlapReaderStatistics* stats = nullptr; + FileSystem* fs = nullptr; }; } // namespace starrocks diff --git a/be/src/storage/rowset/segment.cpp b/be/src/storage/rowset/segment.cpp index 22a4c1c6a3806..904e54d7d3e8b 100644 --- a/be/src/storage/rowset/segment.cpp +++ b/be/src/storage/rowset/segment.cpp @@ -388,9 +388,9 @@ StatusOr> Segment::new_column_iterator(uint32_t return _column_readers[cid]->new_iterator(path); } -Status Segment::new_bitmap_index_iterator(uint32_t cid, BitmapIndexIterator** iter, bool skip_fill_local_cache) { +Status Segment::new_bitmap_index_iterator(uint32_t cid, const IndexReadOptions& options, BitmapIndexIterator** iter) { if (_column_readers[cid] != nullptr && _column_readers[cid]->has_bitmap_index()) { - return _column_readers[cid]->new_bitmap_index_iterator(iter, skip_fill_local_cache); + return _column_readers[cid]->new_bitmap_index_iterator(options, iter); } return Status::OK(); } diff --git a/be/src/storage/rowset/segment.h b/be/src/storage/rowset/segment.h index 4c6b2cf5796fa..7dbc69d2399ef 100644 --- a/be/src/storage/rowset/segment.h +++ b/be/src/storage/rowset/segment.h @@ -59,6 +59,7 @@ class TabletSchema; class ShortKeyIndexDecoder; class ChunkIterator; +class IndexReadOptions; class Schema; class SegmentIterator; class SegmentReadOptions; @@ -119,7 +120,7 @@ class Segment : public std::enable_shared_from_this { // TODO: remove this method, create `ColumnIterator` via `ColumnReader`. StatusOr> new_column_iterator(uint32_t cid, ColumnAccessPath* path = nullptr); - Status new_bitmap_index_iterator(uint32_t cid, BitmapIndexIterator** iter, bool skip_fill_local_cache); + Status new_bitmap_index_iterator(uint32_t cid, const IndexReadOptions& options, BitmapIndexIterator** iter); size_t num_short_keys() const { return _tablet_schema->num_short_key_columns(); } diff --git a/be/src/storage/rowset/segment_iterator.cpp b/be/src/storage/rowset/segment_iterator.cpp index 095bb3b091afc..dd5d275a3fb5a 100644 --- a/be/src/storage/rowset/segment_iterator.cpp +++ b/be/src/storage/rowset/segment_iterator.cpp @@ -1520,8 +1520,16 @@ Status SegmentIterator::_init_bitmap_index_iterators() { segment_ptr = _segment; col_index = cid; } - RETURN_IF_ERROR(segment_ptr->new_bitmap_index_iterator(col_index, &_bitmap_index_iterators[cid], - _skip_fill_local_cache())); + + IndexReadOptions options; + options.fs = segment_ptr->file_system(); + options.file_name = segment_ptr->file_name(); + options.use_page_cache = config::enable_bitmap_memory_page_cache || !config::disable_storage_page_cache; + options.kept_in_memory = config::enable_bitmap_memory_page_cache; + options.skip_fill_local_cache = _skip_fill_local_cache(); + options.stats = _opts.stats; + + RETURN_IF_ERROR(segment_ptr->new_bitmap_index_iterator(col_index, options, &_bitmap_index_iterators[cid])); _has_bitmap_index |= (_bitmap_index_iterators[cid] != nullptr); } } diff --git a/be/src/storage/rowset/zone_map_index.cpp b/be/src/storage/rowset/zone_map_index.cpp index 75284df3f8b8f..01c3c3fc13889 100644 --- a/be/src/storage/rowset/zone_map_index.cpp +++ b/be/src/storage/rowset/zone_map_index.cpp @@ -279,7 +279,8 @@ Status ZoneMapIndexReader::_do_load(const IndexReadOptions& opts, const ZoneMapI IndexedColumnReader reader(opts, meta.page_zone_maps()); RETURN_IF_ERROR(reader.load()); std::unique_ptr iter; - RETURN_IF_ERROR(reader.new_iterator(&iter)); + IndexReadOptions options; + RETURN_IF_ERROR(reader.new_iterator(&iter, options)); _page_zone_maps.resize(reader.num_values()); diff --git a/be/test/storage/rowset/bitmap_index_test.cpp b/be/test/storage/rowset/bitmap_index_test.cpp index e28a0988803a0..d324f5075d68a 100644 --- a/be/test/storage/rowset/bitmap_index_test.cpp +++ b/be/test/storage/rowset/bitmap_index_test.cpp @@ -74,7 +74,8 @@ class BitmapIndexTest : public testing::Test { *reader = new BitmapIndexReader(); ASSIGN_OR_ABORT(auto r, (*reader)->load(_opts, meta.bitmap_index())); ASSERT_TRUE(r); - ASSERT_OK((*reader)->new_iterator(iter)); + IndexReadOptions options; + ASSERT_OK((*reader)->new_iterator(iter, options)); } template @@ -283,7 +284,8 @@ TEST_F(BitmapIndexTest, test_concurrent_load) { ASSERT_EQ(1, loads.load()); BitmapIndexIterator* iter = nullptr; - ASSERT_OK(reader->new_iterator(&iter)); + IndexReadOptions options; + ASSERT_OK(reader->new_iterator(&iter, options)); Roaring bitmap; iter->read_null_bitmap(&bitmap);