Skip to content

Commit

Permalink
enhance: Optimize GetChunkIDByOffset and add ut (#37704)
Browse files Browse the repository at this point in the history
Signed-off-by: sunby <sunbingyi1992@gmail.com>
  • Loading branch information
sunby authored Nov 15, 2024
1 parent 5a23c80 commit 65d3c66
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 9 deletions.
21 changes: 12 additions & 9 deletions internal/core/src/mmap/ChunkedColumn.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,18 @@ class ChunkedColumnBase : public ColumnBase {

std::pair<size_t, size_t>
GetChunkIDByOffset(int64_t offset) const {
int chunk_id = 0;
for (auto& chunk : chunks_) {
if (offset < chunk->RowNums()) {
break;
}
offset -= chunk->RowNums();
chunk_id++;
}
return {chunk_id, offset};
AssertInfo(offset < num_rows_,
"offset {} is out of range, num_rows: {}",
offset,
num_rows_);

auto iter = std::lower_bound(num_rows_until_chunk_.begin(),
num_rows_until_chunk_.end(),
offset + 1);
size_t chunk_idx =
std::distance(num_rows_until_chunk_.begin(), iter) - 1;
size_t offset_in_chunk = offset - num_rows_until_chunk_[chunk_idx];
return {chunk_idx, offset_in_chunk};
}

int64_t
Expand Down
1 change: 1 addition & 0 deletions internal/core/unittest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ set(MILVUS_TEST_FILES
test_tracer.cpp
test_utils.cpp
test_chunked_segment.cpp
test_chunked_column.cpp
)

if ( INDEX_ENGINE STREQUAL "cardinal" )
Expand Down
36 changes: 36 additions & 0 deletions internal/core/unittest/test_chunked_column.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

#include "common/Chunk.h"
#include "gtest/gtest.h"
#include "mmap/ChunkedColumn.h"
namespace milvus {
TEST(test_chunked_column, test_get_chunkid) {
ChunkedColumn column;
std::vector<size_t> chunk_row_nums = {10, 20, 30};
for (auto row_num : chunk_row_nums) {
auto chunk =
std::make_shared<FixedWidthChunk>(row_num, 1, nullptr, 0, 4, false);
column.AddChunk(chunk);
}

int offset = 0;
for (int i = 0; i < chunk_row_nums.size(); ++i) {
for (int j = 0; j < chunk_row_nums[i]; ++j) {
auto [chunk_id, offset_in_chunk] =
column.GetChunkIDByOffset(offset);
ASSERT_EQ(chunk_id, i);
ASSERT_EQ(offset_in_chunk, j);
offset++;
}
}
}
} // namespace milvus

0 comments on commit 65d3c66

Please sign in to comment.