Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](index compaction)support compact multi segments in one index #28889

Merged
merged 4 commits into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1006,13 +1006,16 @@ DEFINE_String(inverted_index_query_cache_limit, "10%");

// inverted index
DEFINE_mDouble(inverted_index_ram_buffer_size, "512");
// -1 indicates not working.
// Normally we should not change this, it's useful for testing.
DEFINE_mInt32(inverted_index_max_buffered_docs, "-1");
// dict path for chinese analyzer
DEFINE_String(inverted_index_dict_path, "${DORIS_HOME}/dict");
DEFINE_Int32(inverted_index_read_buffer_size, "4096");
// tree depth for bkd index
DEFINE_Int32(max_depth_in_bkd_tree, "32");
// index compaction
DEFINE_Bool(inverted_index_compaction_enable, "false");
DEFINE_mBool(inverted_index_compaction_enable, "false");
// index by RAM directory
DEFINE_mBool(inverted_index_ram_dir_enable, "false");
// use num_broadcast_buffer blocks as buffer to do broadcast
Expand Down
3 changes: 2 additions & 1 deletion be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1043,13 +1043,14 @@ DECLARE_String(inverted_index_query_cache_limit);

// inverted index
DECLARE_mDouble(inverted_index_ram_buffer_size);
DECLARE_mInt32(inverted_index_max_buffered_docs);
// dict path for chinese analyzer
DECLARE_String(inverted_index_dict_path);
DECLARE_Int32(inverted_index_read_buffer_size);
// tree depth for bkd index
DECLARE_Int32(max_depth_in_bkd_tree);
// index compaction
DECLARE_Bool(inverted_index_compaction_enable);
DECLARE_mBool(inverted_index_compaction_enable);
// index by RAM directory
DECLARE_mBool(inverted_index_ram_dir_enable);
// use num_broadcast_buffer blocks as buffer to do broadcast
Expand Down
38 changes: 33 additions & 5 deletions be/src/olap/compaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,34 @@ Status Compaction::do_compaction_impl(int64_t permits) {

if (_input_row_num > 0 && stats.rowid_conversion && config::inverted_index_compaction_enable) {
OlapStopWatch inverted_watch;

// check rowid_conversion correctness
Version version = _tablet->max_version();
DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
std::set<RowLocation> missed_rows;
std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>> location_map;
// Convert the delete bitmap of the input rowsets to output rowset.
std::size_t missed_rows_size = 0;
_tablet->calc_compaction_output_rowset_delete_bitmap(
_input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows,
&location_map, _tablet->tablet_meta()->delete_bitmap(),
&output_rowset_delete_bitmap);
if (!allow_delete_in_cumu_compaction()) {
missed_rows_size = missed_rows.size();
if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION &&
stats.merged_rows != missed_rows_size) {
std::string err_msg = fmt::format(
"cumulative compaction: the merged rows({}) is not equal to missed "
"rows({}) in rowid conversion, tablet_id: {}, table_id:{}",
stats.merged_rows, missed_rows_size, _tablet->tablet_id(),
_tablet->table_id());
DCHECK(false) << err_msg;
LOG(WARNING) << err_msg;
}
}

RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, location_map));

// translation vec
// <<dest_idx_num, dest_docId>>
// the first level vector: index indicates src segment.
Expand All @@ -425,7 +453,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
// src index files
// format: rowsetId_segmentId
std::vector<std::string> src_index_files(src_segment_num);
for (auto m : src_seg_to_id_map) {
for (const auto& m : src_seg_to_id_map) {
std::pair<RowsetId, uint32_t> p = m.first;
src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second);
}
Expand Down Expand Up @@ -676,11 +704,11 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) {
// of incremental data later.
// TODO(LiaoXin): check if there are duplicate keys
std::size_t missed_rows_size = 0;
_tablet->calc_compaction_output_rowset_delete_bitmap(
_input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows,
&location_map, _tablet->tablet_meta()->delete_bitmap(),
&output_rowset_delete_bitmap);
if (!allow_delete_in_cumu_compaction()) {
_tablet->calc_compaction_output_rowset_delete_bitmap(
_input_rowsets, _rowid_conversion, 0, version.second + 1, &missed_rows,
&location_map, _tablet->tablet_meta()->delete_bitmap(),
&output_rowset_delete_bitmap);
missed_rows_size = missed_rows.size();
if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION && stats != nullptr &&
stats->merged_rows != missed_rows_size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Status compact_column(int32_t index_id, int src_segment_num, int dest_segment_nu
dest_index_dirs[i] = DorisCompoundDirectoryFactory::getDirectory(fs, path.c_str(), true);
}

DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
dest_segment_num_rows);

Expand Down
3 changes: 1 addition & 2 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@

namespace doris::segment_v2 {
const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL;
const int32_t MAX_BUFFER_DOCS = 100000000;
const int32_t MERGE_FACTOR = 100000000;
const int32_t MAX_LEAF_COUNT = 1024;
const float MAXMBSortInHeap = 512.0 * 8;
Expand Down Expand Up @@ -196,8 +195,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
bool close_dir_on_shutdown = true;
index_writer = std::make_unique<lucene::index::IndexWriter>(
_dir.get(), _analyzer.get(), create_index, close_dir_on_shutdown);
index_writer->setMaxBufferedDocs(MAX_BUFFER_DOCS);
index_writer->setRAMBufferSizeMB(config::inverted_index_ram_buffer_size);
_index_writer->setMaxBufferedDocs(config::inverted_index_max_buffered_docs);
index_writer->setMaxFieldLength(MAX_FIELD_LEN);
index_writer->setMergeFactor(MERGE_FACTOR);
index_writer->setUseCompoundFile(false);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds

-- !sql --

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds

-- !sql --

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 8 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 9 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds
2018-02-21T12:00 10 I'm using the builds

-- !sql --

-- !sql --
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 1 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 2 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 3 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 4 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 5 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 6 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds
2018-02-21T12:00 7 I'm using the builds

Loading
Loading