Skip to content

Commit

Permalink
[bugfix](index) Fix build index limitations (apache#24358)
Browse files Browse the repository at this point in the history
1. skip existed index on column with different id on build index
2. allow build index for CANCELED or FINISHED state
  • Loading branch information
xiaokang authored Sep 14, 2023
1 parent eaa3564 commit 9c6734e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 14 deletions.
25 changes: 18 additions & 7 deletions be/src/olap/task/index_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ Status IndexBuilder::update_inverted_index_info() {
// just do link files
LOG(INFO) << "begin to update_inverted_index_info, tablet=" << _tablet->tablet_id()
<< ", is_drop_op=" << _is_drop_op;
// index ids that will not be linked
std::set<int32_t> without_index_uids;
for (auto i = 0; i < _input_rowsets.size(); ++i) {
auto input_rowset = _input_rowsets[i];
TabletSchemaSPtr output_rs_tablet_schema = std::make_shared<TabletSchema>();
Expand All @@ -78,12 +80,13 @@ Status IndexBuilder::update_inverted_index_info() {
const TabletIndex* exist_index =
output_rs_tablet_schema->get_inverted_index(column_uid);
if (exist_index && exist_index->index_id() != index.index_id()) {
// maybe there are concurrent drop index request did not obtain the lock,
// so return error, to wait the drop index request finished.
return Status::Error<ErrorCode::INVERTED_INDEX_BUILD_WAITTING>(
LOG(WARNING) << fmt::format(
"column: {} has a exist inverted index, but the index id not equal "
"request's index id, , exist index id: {}, request's index id: {}",
"request's index id, , exist index id: {}, request's index id: {}, "
"remove exist index in new output_rs_tablet_schema",
column_uid, exist_index->index_id(), index.index_id());
without_index_uids.insert(exist_index->index_id());
output_rs_tablet_schema->remove_index(exist_index->index_id());
}
output_rs_tablet_schema->append_index(index);
}
Expand All @@ -105,9 +108,17 @@ Status IndexBuilder::update_inverted_index_info() {
if (!status.ok()) {
return Status::Error<ErrorCode::ROWSET_BUILDER_INIT>(status.to_string());
}
RETURN_IF_ERROR(input_rowset->link_files_to(_tablet->tablet_path(),
output_rs_writer->rowset_id(), 0,
&_alter_index_ids)); // build output rowset

// if without_index_uids is not empty, copy _alter_index_ids to it
// else just use _alter_index_ids to avoid copy
if (!without_index_uids.empty()) {
without_index_uids.insert(_alter_index_ids.begin(), _alter_index_ids.end());
}

// build output rowset
RETURN_IF_ERROR(input_rowset->link_files_to(
_tablet->tablet_path(), output_rs_writer->rowset_id(), 0,
without_index_uids.empty() ? &_alter_index_ids : &without_index_uids));

auto input_rowset_meta = input_rowset->rowset_meta();
RowsetMetaSharedPtr rowset_meta = std::make_shared<RowsetMeta>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2898,8 +2898,8 @@ public boolean hasIndexChangeJobOnPartition(
&& indexChangeJob.getTableId() == tableId
&& indexChangeJob.getPartitionName().equals(partitionName)
&& indexChangeJob.hasSameAlterInvertedIndex(isDrop, alterIndexes)
&& indexChangeJob.getJobState() != IndexChangeJob.JobState.CANCELLED) {
// if JobState is CANCELLED, also allow user to create job again
&& !indexChangeJob.isDone()) {
// if JobState is done (CANCELLED or FINISHED), also allow user to create job again
return true;
}
}
Expand Down
33 changes: 28 additions & 5 deletions regression-test/suites/inverted_index_p0/test_build_index.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,22 @@ suite("test_build_index", "inverted_index"){
return "wait_timeout"
}

def wait_for_last_build_index_on_table_running = { table_name, OpTimeout ->
for(int t = delta_time; t <= OpTimeout; t += delta_time){
alter_res = sql """SHOW BUILD INDEX WHERE TableName = "${table_name}" ORDER BY JobId """

def last_job_state = alter_res[alter_res.size()-1][7];
if (last_job_state == "RUNNING") {
logger.info(table_name + " last index job running, state: " + last_job_state + ", detail: " + alter_res)
return last_job_state;
}
useTime = t
sleep(delta_time)
}
assertTrue(useTime <= OpTimeout, "wait_for_last_build_index_on_table_finish timeout")
return "wait_timeout"
}

def tableName = "hackernews_1m"

sql "DROP TABLE IF EXISTS ${tableName}"
Expand Down Expand Up @@ -138,29 +154,31 @@ suite("test_build_index", "inverted_index"){

sql "sync"

// ADD INDEX
sql """ ALTER TABLE ${tableName} ADD INDEX idx_comment (`comment`) USING INVERTED PROPERTIES("parser" = "english") """

// BUILD INDEX and expect state is RUNNING
sql """ BUILD INDEX idx_comment ON ${tableName} """

sleep(1000)

def state = wait_for_last_build_index_on_table_running(tableName, timeout)
def result = sql """ SHOW BUILD INDEX WHERE TableName = "${tableName}" ORDER BY JobId """
assertEquals(result[result.size()-1][1], tableName)
assertTrue(result[result.size()-1][3].contains("ADD INDEX"))
assertEquals(result[result.size()-1][7], "RUNNING")

// CANCEL BUILD INDEX and expect state is CANCELED
sql """ CANCEL BUILD INDEX ON ${tableName} (${result[result.size()-1][0]}) """
result = sql """ SHOW BUILD INDEX WHERE TableName = "${tableName}" ORDER BY JobId """
assertEquals(result[result.size()-1][1], tableName)
assertTrue(result[result.size()-1][3].contains("ADD INDEX"))
assertEquals(result[result.size()-1][7], "CANCELLED")
assertEquals(result[result.size()-1][8], "user cancelled")


// BUILD INDEX and expect state is FINISHED
sql """ BUILD INDEX idx_comment ON ${tableName}; """
def state = wait_for_last_build_index_on_table_finish(tableName, timeout)
state = wait_for_last_build_index_on_table_finish(tableName, timeout)
assertEquals(state, "FINISHED")

// CANCEL BUILD INDEX in FINISHED state and expect exception
def success = false;
try {
sql """ CANCEL BUILD INDEX ON ${tableName}; """
Expand All @@ -169,4 +187,9 @@ suite("test_build_index", "inverted_index"){
logger.info(" CANCEL BUILD INDEX ON ${tableName} exception: " + ex)
}
assertFalse(success)

// BUILD INDEX again and expect state is FINISHED
sql """ BUILD INDEX idx_comment ON ${tableName}; """
state = wait_for_last_build_index_on_table_finish(tableName, timeout)
assertEquals(state, "FINISHED")
}

0 comments on commit 9c6734e

Please sign in to comment.