Skip to content

Commit

Permalink
enhance: Accelerate find_first by utilizing bitset simd methods
Browse files Browse the repository at this point in the history
Related to milvus-io#39003

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
  • Loading branch information
congqixia committed Jan 6, 2025
1 parent 0a2c964 commit 03dffb4
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 24 deletions.
18 changes: 11 additions & 7 deletions internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1255,20 +1255,24 @@ ChunkedSegmentSealedImpl::find_first(int64_t limit,
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);

// flip bitset since `find_next` is used to find true.
auto flipped = bitset.clone();
flipped.flip();

int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}

if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}

return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}

ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl(
Expand Down
24 changes: 14 additions & 10 deletions internal/core/src/segcore/InsertRecord.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,20 +252,24 @@ class OffsetOrderedArray : public OffsetMap {
limit = std::min(limit, cnt);
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
auto it = array_.begin();
for (; hit_num < limit && it != array_.end(); it++) {
auto seg_offset = it->second;
if (seg_offset >= size) {
// flip bitset since `find_first` & `find_next` is used to find true.
// could be optimized by support find false in bitset.
auto flipped = bitset.clone();
flipped.flip();

int64_t offset = 0;
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}

if (!bitset[seg_offset]) {
seg_offsets.push_back(seg_offset);
hit_num++;
}
result = flipped.find_next(offset);
}
return {seg_offsets, more_hit_than_limit && it != array_.end()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}

void
Expand Down
19 changes: 12 additions & 7 deletions internal/core/src/segcore/SegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1714,20 +1714,25 @@ SegmentSealedImpl::find_first(int64_t limit, const BitsetType& bitset) const {
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);

// flip bitset since `find_first` & `find_next` is used to find true.
// could be optimized by support find false in bitset.
auto flipped = bitset.clone();
flipped.flip();

int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}

if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}

return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}

SegcoreError
Expand Down

0 comments on commit 03dffb4

Please sign in to comment.