Skip to content

Commit

Permalink
[cherry-pick](inverted index) cherry pick pr on branch doris master t…
Browse files Browse the repository at this point in the history
…o fix inverted index issues (apache#1425)

* [Fix](inverted index) fix add nulls bug for inverted fulltext index (apache#16078)
    We found a problem with inverted index when parser=english,
    if there were nulls in columns when flushing inverted index for them, it can cause CLucene throwing an exception.
* [fix](regression test) fix test_array_index.groovy without 'order by' lead to result mismatch (apache#16575)
* [fix](inverted index) fix array type inverted index query error (apache#16582)
* [Fix](inverted index) fix array inverted index error match result when doing schema change add index (apache#16839)
    There is a bug in inverted_index_writer when adding multiple lines array values' index.
    This problem can cause error result when doing schema change adding index.

-----------

commit 1a9eefe
Author: airborne12 <[airborne08@gmail.com](mailto:airborne08@gmail.com)>
Date:   Fri Feb 17 11:50:39 2023 +0800

    [Fix](inverted index) fix array inverted index error match result when doing schema change add index (apache#16839)


commit ad14174
Author: YueW <[45946325+Tanya-W@users.noreply.github.com](mailto:45946325+Tanya-W@users.noreply.github.com)>
Date:   Fri Feb 10 17:57:15 2023 +0800

    [fix](inverted index) fix array type inverted index query error (apache#16582)


commit e682991
Author: YueW <[45946325+Tanya-W@users.noreply.github.com](mailto:45946325+Tanya-W@users.noreply.github.com)>
Date:   Fri Feb 10 08:53:22 2023 +0800

    [fix](regression test) fix test_array_index.groovy without 'order by' lead to result mismatch (apache#16575)


commit 69a3ecf
Author: airborne12 <[airborne08@gmail.com](mailto:airborne08@gmail.com)>
Date:   Thu Jan 19 21:21:44 2023 +0800

    [Fix](inverted index) fix add nulls bug for inverted fulltext index (apache#16078)

---------

Co-authored-by: airborne12 <airborne08@gmail.com>
  • Loading branch information
Tanya-W and airborne12 authored Feb 20, 2023
1 parent 51bad83 commit 7397e95
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 37 deletions.
2 changes: 1 addition & 1 deletion be/src/exec/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ class ColumnValueRange {
condition.__set_condition_op("match_element_ge");
}
condition.condition_values.push_back(
cast_to_string<primitive_type, CppType>(value.second, 0));
cast_to_string<primitive_type, CppType>(value.second, _scale));
if (condition.condition_values.size() != 0) {
filters.push_back(condition);
}
Expand Down
16 changes: 9 additions & 7 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL;
const int32_t MAX_LEAF_COUNT = 1024;
const float MAXMBSortInHeap = 512.0 * 8;
const int DIMS = 1;
const std::string empty_value;

template <FieldType field_type>
class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
Expand Down Expand Up @@ -195,9 +196,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
}

for (int i = 0; i < count; ++i) {
std::string empty_value;
auto empty_val = lucene::util::Misc::_charToWide(empty_value.c_str());
_field->setValue(empty_val, false);
new_fulltext_field(empty_value.c_str(), 0);
_index_writer->addDocument(_doc);
}
}
Expand Down Expand Up @@ -262,8 +261,6 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {

Status add_array_values(size_t field_size, const CollectionValue* values,
size_t count) override {
auto* item_data_ptr = const_cast<CollectionValue*>(values)->mutable_data();

if constexpr (field_is_slice_type(field_type)) {
if (_field == nullptr) {
LOG(ERROR)
Expand All @@ -272,6 +269,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
return Status::InternalError("could not find field in clucene");
}
for (int i = 0; i < count; ++i) {
auto* item_data_ptr = const_cast<CollectionValue*>(values)->mutable_data();
std::vector<std::string> strings;

for (size_t j = 0; j < values->length(); ++j) {
Expand All @@ -286,11 +284,14 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
new_fulltext_field(value.c_str(), value.length());
_rid++;
_index_writer->addDocument(_doc);
values++;
}
} else if constexpr (field_is_numeric_type(field_type)) {
auto p = reinterpret_cast<const CppType*>(item_data_ptr);
for (int i = 0; i < count; ++i) {
auto* item_data_ptr = const_cast<CollectionValue*>(values)->mutable_data();

for (size_t j = 0; j < values->length(); ++j) {
const CppType* p = reinterpret_cast<const CppType*>(item_data_ptr);
if (values->is_null_at(j)) {
// bkd do not index null values, so we do nothing here.
} else {
Expand All @@ -300,10 +301,11 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
_value_key_coder->full_encode_ascending(p, &new_value);
_bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid);
}
p++;
item_data_ptr = (uint8_t*)item_data_ptr + field_size;
}
_row_ids_seen_for_bkd++;
_rid++;
values++;
}
}
return Status::OK();
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/schema_change.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1261,7 +1261,6 @@ Status SchemaChangeForInvertedIndex::_add_nullable(const std::string& column_nam
auto step = next_run_step();
if (null_map[offset]) {
RETURN_IF_ERROR(_inverted_index_builders[index_writer_sign]->add_nulls(step));
*ptr += field->size() * step;
} else {
if (field->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
DCHECK(field->get_sub_field_count() == 1);
Expand All @@ -1273,6 +1272,7 @@ Status SchemaChangeForInvertedIndex::_add_nullable(const std::string& column_nam
step));
}
}
*ptr += field->size() * step;
offset += step;
} while (offset < num_rows);
} catch (const std::exception& e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,18 @@ suite("smoke_test_array_index", "smoke"){

sql "INSERT INTO $indexTblName VALUES (1, [10,20,30], ['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, [30,40,50], NULL);"
sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea';"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60 ORDER BY id;"
}
28 changes: 14 additions & 14 deletions regression-test/suites/index/test_array_index.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,18 @@ suite("test_array_index"){

sql "INSERT INTO $indexTblName VALUES (1, [10,20,30], ['i','love','china']), (2, [20,30,40], ['i','love','north korea']), (3, [30,40,50], NULL);"
sql "INSERT INTO $indexTblName VALUES (4, [40,50,60], NULL);"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north';"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea';"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea' ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_ge 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_le 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_gt 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_lt 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 10 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 20 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 30 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50 ORDER BY id;"
qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60 ORDER BY id;"
}

0 comments on commit 7397e95

Please sign in to comment.