Skip to content

Commit

Permalink
[Enhancement] Improve the bitmap index cache strategy (#24940)
Browse files Browse the repository at this point in the history
1. Add a config to allow only cache the bitmap index
2. Fix the index page IO metrics bug.

Signed-off-by: kangkaisen <kangkaisen@apache.org>
  • Loading branch information
kangkaisen authored Jun 9, 2023
1 parent 09664f2 commit 174fcad
Show file tree
Hide file tree
Showing 14 changed files with 62 additions and 41 deletions.
2 changes: 2 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ CONF_Int64(index_stream_cache_capacity, "10737418240");
CONF_mString(storage_page_cache_limit, "20%");
// whether to disable page cache feature in storage
CONF_mBool(disable_storage_page_cache, "false");
// whether to enable the bitmap index memory cache
CONF_mBool(enable_bitmap_memory_page_cache, "false");
// whether to disable column pool
CONF_Bool(disable_column_pool, "false");

Expand Down
6 changes: 3 additions & 3 deletions be/src/storage/rowset/bitmap_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ Status BitmapIndexReader::_do_load(const IndexReadOptions& opts, const BitmapInd
return Status::OK();
}

Status BitmapIndexReader::new_iterator(BitmapIndexIterator** iterator) {
Status BitmapIndexReader::new_iterator(BitmapIndexIterator** iterator, const IndexReadOptions& opts) {
std::unique_ptr<IndexedColumnIterator> dict_iter;
std::unique_ptr<IndexedColumnIterator> bitmap_iter;
RETURN_IF_ERROR(_dict_column_reader->new_iterator(&dict_iter));
RETURN_IF_ERROR(_bitmap_column_reader->new_iterator(&bitmap_iter));
RETURN_IF_ERROR(_dict_column_reader->new_iterator(&dict_iter, opts));
RETURN_IF_ERROR(_bitmap_column_reader->new_iterator(&bitmap_iter, opts));
*iterator = new BitmapIndexIterator(this, std::move(dict_iter), std::move(bitmap_iter), _has_null, bitmap_nums());
return Status::OK();
}
Expand Down
2 changes: 1 addition & 1 deletion be/src/storage/rowset/bitmap_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class BitmapIndexReader {

// create a new column iterator. Client should delete returned iterator
// REQUIRES: the index data has been successfully `load()`ed into memory.
Status new_iterator(BitmapIndexIterator** iterator);
Status new_iterator(BitmapIndexIterator** iterator, const IndexReadOptions& opts);

// REQUIRES: the index data has been successfully `load()`ed into memory.
int64_t bitmap_nums() { return _bitmap_column_reader->num_values(); }
Expand Down
3 changes: 2 additions & 1 deletion be/src/storage/rowset/bloom_filter_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ void BloomFilterIndexReader::_reset() {

Status BloomFilterIndexReader::new_iterator(std::unique_ptr<BloomFilterIndexIterator>* iterator) {
std::unique_ptr<IndexedColumnIterator> bf_iter;
RETURN_IF_ERROR(_bloom_filter_reader->new_iterator(&bf_iter));
IndexReadOptions options;
RETURN_IF_ERROR(_bloom_filter_reader->new_iterator(&bf_iter, options));
iterator->reset(new BloomFilterIndexIterator(this, std::move(bf_iter)));
return Status::OK();
}
Expand Down
16 changes: 5 additions & 11 deletions be/src/storage/rowset/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,9 @@ Status ColumnReader::_init(ColumnMetaPB* meta) {
}
}

Status ColumnReader::new_bitmap_index_iterator(BitmapIndexIterator** iterator, bool skip_fill_local_cache) {
RETURN_IF_ERROR(_load_bitmap_index(skip_fill_local_cache));
RETURN_IF_ERROR(_bitmap_index->new_iterator(iterator));
Status ColumnReader::new_bitmap_index_iterator(const IndexReadOptions& options, BitmapIndexIterator** iterator) {
RETURN_IF_ERROR(_load_bitmap_index(options));
RETURN_IF_ERROR(_bitmap_index->new_iterator(iterator, options));
return Status::OK();
}

Expand Down Expand Up @@ -384,17 +384,11 @@ Status ColumnReader::_load_zonemap_index(bool skip_fill_local_cache) {
return Status::OK();
}

Status ColumnReader::_load_bitmap_index(bool skip_fill_local_cache) {
Status ColumnReader::_load_bitmap_index(const IndexReadOptions& options) {
if (_bitmap_index == nullptr || _bitmap_index->loaded()) return Status::OK();
SCOPED_THREAD_LOCAL_CHECK_MEM_LIMIT_SETTER(false);
IndexReadOptions opts;
opts.fs = file_system();
opts.file_name = file_name();
opts.use_page_cache = !config::disable_storage_page_cache;
opts.kept_in_memory = keep_in_memory();
opts.skip_fill_local_cache = skip_fill_local_cache;
auto meta = _bitmap_index_meta.get();
ASSIGN_OR_RETURN(auto first_load, _bitmap_index->load(opts, *meta));
ASSIGN_OR_RETURN(auto first_load, _bitmap_index->load(options, *meta));
if (UNLIKELY(first_load)) {
MEM_TRACKER_SAFE_RELEASE(ExecEnv::GetInstance()->bitmap_index_mem_tracker(),
_bitmap_index_meta->SpaceUsedLong());
Expand Down
4 changes: 2 additions & 2 deletions be/src/storage/rowset/column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class ColumnReader {

// Caller should free returned iterator after unused.
// TODO: StatusOr<std::unique_ptr<ColumnIterator>> new_bitmap_index_iterator()
Status new_bitmap_index_iterator(BitmapIndexIterator** iterator, bool skip_fill_local_cache);
Status new_bitmap_index_iterator(const IndexReadOptions& options, BitmapIndexIterator** iterator);

// Seek to the first entry in the column.
Status seek_to_first(OrdinalPageIndexIterator* iter);
Expand Down Expand Up @@ -173,7 +173,7 @@ class ColumnReader {

Status _load_zonemap_index(bool skip_fill_local_cache);
Status _load_ordinal_index(bool skip_fill_local_cache);
Status _load_bitmap_index(bool skip_fill_local_cache);
Status _load_bitmap_index(const IndexReadOptions& options);
Status _load_bloom_filter_index(bool skip_fill_local_cache);

Status _parse_zone_map(const ZoneMapPB& zm, ZoneMapDetail* detail) const;
Expand Down
22 changes: 14 additions & 8 deletions be/src/storage/rowset/indexed_column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,46 +84,52 @@ Status IndexedColumnReader::load_index_page(RandomAccessFile* read_file, const P
IndexPageReader* reader) {
Slice body;
PageFooterPB footer;
RETURN_IF_ERROR(read_page(read_file, PagePointer(pp), handle, &body, &footer));
RETURN_IF_ERROR(read_page(read_file, PagePointer(pp), handle, &body, &footer, nullptr));
RETURN_IF_ERROR(reader->parse(body, footer.index_page_footer()));
return Status::OK();
}

Status IndexedColumnReader::read_page(RandomAccessFile* read_file, const PagePointer& pp, PageHandle* handle,
Slice* body, PageFooterPB* footer) const {
Slice* body, PageFooterPB* footer, OlapReaderStatistics* stats) const {
PageReadOptions opts;
opts.read_file = read_file;
opts.page_pointer = pp;
opts.codec = _compress_codec;
OlapReaderStatistics tmp_stats;
opts.stats = &tmp_stats;
if (stats != nullptr) {
opts.stats = stats;
}
opts.use_page_cache = _use_page_cache;
opts.kept_in_memory = _kept_in_memory;
opts.encoding_type = _encoding_info->encoding();

return PageIO::read_and_decompress_page(opts, handle, body, footer);
}

Status IndexedColumnReader::new_iterator(std::unique_ptr<IndexedColumnIterator>* iter) {
Status IndexedColumnReader::new_iterator(std::unique_ptr<IndexedColumnIterator>* iter, const IndexReadOptions& opts) {
RandomAccessFileOptions file_opts{.skip_fill_local_cache = _skip_fill_local_cache};
ASSIGN_OR_RETURN(auto file, _fs->new_random_access_file(file_opts, _file_name));
iter->reset(new IndexedColumnIterator(this, std::move(file)));

IndexedColumnIteratorOptions index_opts;
index_opts.read_file = std::move(file);
index_opts.stats = opts.stats;
iter->reset(new IndexedColumnIterator(this, std::move(index_opts)));
return Status::OK();
}

///////////////////////////////////////////////////////////////////////////////
IndexedColumnIterator::IndexedColumnIterator(const IndexedColumnReader* reader,
std::unique_ptr<RandomAccessFile> read_file)
IndexedColumnIterator::IndexedColumnIterator(const IndexedColumnReader* reader, IndexedColumnIteratorOptions opts)
: _reader(reader),
_read_file(std::move(read_file)),
_opts(std::move(opts)),
_ordinal_iter(&reader->_ordinal_index_reader),
_value_iter(&reader->_value_index_reader) {}

Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
PageHandle handle;
Slice body;
PageFooterPB footer;
RETURN_IF_ERROR(_reader->read_page(_read_file.get(), pp, &handle, &body, &footer));
RETURN_IF_ERROR(_reader->read_page(_opts.read_file.get(), pp, &handle, &body, &footer, _opts.stats));
// parse data page
// note that page_index is not used in IndexedColumnIterator, so we pass 0
return parse_page(&_data_page, std::move(handle), body, footer.data_page_footer(), _reader->encoding_info(), pp, 0);
Expand Down
15 changes: 10 additions & 5 deletions be/src/storage/rowset/indexed_column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class TypeInfo;
class EncodingInfo;
class IndexedColumnReader;

struct IndexedColumnIteratorOptions {
std::unique_ptr<RandomAccessFile> read_file;
OlapReaderStatistics* stats = nullptr;
};

class IndexedColumnIterator {
friend class IndexedColumnReader;

Expand Down Expand Up @@ -86,12 +91,13 @@ class IndexedColumnIterator {
Status next_batch(size_t* n, Column* column);

private:
IndexedColumnIterator(const IndexedColumnReader* reader, std::unique_ptr<RandomAccessFile> read_file);
IndexedColumnIterator(const IndexedColumnReader* reader, IndexedColumnIteratorOptions opts);

Status _read_data_page(const PagePointer& pp);

const IndexedColumnReader* _reader = nullptr;
std::unique_ptr<RandomAccessFile> _read_file;
IndexedColumnIteratorOptions _opts;

// iterator for ordinal index page
IndexPageIterator _ordinal_iter;
// iterator for value index page
Expand All @@ -104,7 +110,6 @@ class IndexedColumnIterator {
std::unique_ptr<ParsedPage> _data_page;
// next_batch() will read from this position
ordinal_t _current_ordinal = 0;
// open file handle
};

// thread-safe reader for IndexedColumn (see comments of `IndexedColumnWriter` to understand what IndexedColumn is)
Expand All @@ -123,7 +128,7 @@ class IndexedColumnReader {

Status load();

Status new_iterator(std::unique_ptr<IndexedColumnIterator>* iter);
Status new_iterator(std::unique_ptr<IndexedColumnIterator>* iter, const IndexReadOptions& opts);

int64_t num_values() const { return _num_values; }
const EncodingInfo* encoding_info() const { return _encoding_info; }
Expand All @@ -143,7 +148,7 @@ class IndexedColumnReader {

// read a page specified by `pp' from `file' into `handle'
Status read_page(RandomAccessFile* read_file, const PagePointer& pp, PageHandle* handle, Slice* body,
PageFooterPB* footer) const;
PageFooterPB* footer, OlapReaderStatistics* stats) const;

FileSystem* _fs;
std::string _file_name;
Expand Down
5 changes: 3 additions & 2 deletions be/src/storage/rowset/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ class PageBuilderOptions {

class IndexReadOptions {
public:
FileSystem* fs = nullptr;
std::string file_name = "";
bool use_page_cache = false;
bool kept_in_memory = false;
// for lake tablet
bool skip_fill_local_cache = false;
std::string file_name = "";
OlapReaderStatistics* stats = nullptr;
FileSystem* fs = nullptr;
};

} // namespace starrocks
4 changes: 2 additions & 2 deletions be/src/storage/rowset/segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,9 @@ StatusOr<std::unique_ptr<ColumnIterator>> Segment::new_column_iterator(uint32_t
return _column_readers[cid]->new_iterator(path);
}

Status Segment::new_bitmap_index_iterator(uint32_t cid, BitmapIndexIterator** iter, bool skip_fill_local_cache) {
Status Segment::new_bitmap_index_iterator(uint32_t cid, const IndexReadOptions& options, BitmapIndexIterator** iter) {
if (_column_readers[cid] != nullptr && _column_readers[cid]->has_bitmap_index()) {
return _column_readers[cid]->new_bitmap_index_iterator(iter, skip_fill_local_cache);
return _column_readers[cid]->new_bitmap_index_iterator(options, iter);
}
return Status::OK();
}
Expand Down
3 changes: 2 additions & 1 deletion be/src/storage/rowset/segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class TabletSchema;
class ShortKeyIndexDecoder;

class ChunkIterator;
class IndexReadOptions;
class Schema;
class SegmentIterator;
class SegmentReadOptions;
Expand Down Expand Up @@ -119,7 +120,7 @@ class Segment : public std::enable_shared_from_this<Segment> {
// TODO: remove this method, create `ColumnIterator` via `ColumnReader`.
StatusOr<std::unique_ptr<ColumnIterator>> new_column_iterator(uint32_t cid, ColumnAccessPath* path = nullptr);

Status new_bitmap_index_iterator(uint32_t cid, BitmapIndexIterator** iter, bool skip_fill_local_cache);
Status new_bitmap_index_iterator(uint32_t cid, const IndexReadOptions& options, BitmapIndexIterator** iter);

size_t num_short_keys() const { return _tablet_schema->num_short_key_columns(); }

Expand Down
12 changes: 10 additions & 2 deletions be/src/storage/rowset/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1520,8 +1520,16 @@ Status SegmentIterator::_init_bitmap_index_iterators() {
segment_ptr = _segment;
col_index = cid;
}
RETURN_IF_ERROR(segment_ptr->new_bitmap_index_iterator(col_index, &_bitmap_index_iterators[cid],
_skip_fill_local_cache()));

IndexReadOptions options;
options.fs = segment_ptr->file_system();
options.file_name = segment_ptr->file_name();
options.use_page_cache = config::enable_bitmap_memory_page_cache || !config::disable_storage_page_cache;
options.kept_in_memory = config::enable_bitmap_memory_page_cache;
options.skip_fill_local_cache = _skip_fill_local_cache();
options.stats = _opts.stats;

RETURN_IF_ERROR(segment_ptr->new_bitmap_index_iterator(col_index, options, &_bitmap_index_iterators[cid]));
_has_bitmap_index |= (_bitmap_index_iterators[cid] != nullptr);
}
}
Expand Down
3 changes: 2 additions & 1 deletion be/src/storage/rowset/zone_map_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ Status ZoneMapIndexReader::_do_load(const IndexReadOptions& opts, const ZoneMapI
IndexedColumnReader reader(opts, meta.page_zone_maps());
RETURN_IF_ERROR(reader.load());
std::unique_ptr<IndexedColumnIterator> iter;
RETURN_IF_ERROR(reader.new_iterator(&iter));
IndexReadOptions options;
RETURN_IF_ERROR(reader.new_iterator(&iter, options));

_page_zone_maps.resize(reader.num_values());

Expand Down
6 changes: 4 additions & 2 deletions be/test/storage/rowset/bitmap_index_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ class BitmapIndexTest : public testing::Test {
*reader = new BitmapIndexReader();
ASSIGN_OR_ABORT(auto r, (*reader)->load(_opts, meta.bitmap_index()));
ASSERT_TRUE(r);
ASSERT_OK((*reader)->new_iterator(iter));
IndexReadOptions options;
ASSERT_OK((*reader)->new_iterator(iter, options));
}

template <LogicalType type>
Expand Down Expand Up @@ -283,7 +284,8 @@ TEST_F(BitmapIndexTest, test_concurrent_load) {
ASSERT_EQ(1, loads.load());

BitmapIndexIterator* iter = nullptr;
ASSERT_OK(reader->new_iterator(&iter));
IndexReadOptions options;
ASSERT_OK(reader->new_iterator(&iter, options));

Roaring bitmap;
iter->read_null_bitmap(&bitmap);
Expand Down

0 comments on commit 174fcad

Please sign in to comment.