diff --git a/internal/core/src/mmap/ChunkedColumn.h b/internal/core/src/mmap/ChunkedColumn.h index 93d43bafa6b17..082d8e10e1aa1 100644 --- a/internal/core/src/mmap/ChunkedColumn.h +++ b/internal/core/src/mmap/ChunkedColumn.h @@ -157,15 +157,18 @@ class ChunkedColumnBase : public ColumnBase { std::pair GetChunkIDByOffset(int64_t offset) const { - int chunk_id = 0; - for (auto& chunk : chunks_) { - if (offset < chunk->RowNums()) { - break; - } - offset -= chunk->RowNums(); - chunk_id++; - } - return {chunk_id, offset}; + AssertInfo(offset < num_rows_, + "offset {} is out of range, num_rows: {}", + offset, + num_rows_); + + auto iter = std::lower_bound(num_rows_until_chunk_.begin(), + num_rows_until_chunk_.end(), + offset + 1); + size_t chunk_idx = + std::distance(num_rows_until_chunk_.begin(), iter) - 1; + size_t offset_in_chunk = offset - num_rows_until_chunk_[chunk_idx]; + return {chunk_idx, offset_in_chunk}; } int64_t diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 67d97b83c3fff..56123c7ef06e6 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -86,6 +86,7 @@ set(MILVUS_TEST_FILES test_tracer.cpp test_utils.cpp test_chunked_segment.cpp + test_chunked_column.cpp ) if ( INDEX_ENGINE STREQUAL "cardinal" ) diff --git a/internal/core/unittest/test_chunked_column.cpp b/internal/core/unittest/test_chunked_column.cpp new file mode 100644 index 0000000000000..e17de1163fe28 --- /dev/null +++ b/internal/core/unittest/test_chunked_column.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "common/Chunk.h" +#include "gtest/gtest.h" +#include "mmap/ChunkedColumn.h" +namespace milvus { +TEST(test_chunked_column, test_get_chunkid) { + ChunkedColumn column; + std::vector chunk_row_nums = {10, 20, 30}; + for (auto row_num : chunk_row_nums) { + auto chunk = + std::make_shared(row_num, 1, nullptr, 0, 4, false); + column.AddChunk(chunk); + } + + int offset = 0; + for (int i = 0; i < chunk_row_nums.size(); ++i) { + for (int j = 0; j < chunk_row_nums[i]; ++j) { + auto [chunk_id, offset_in_chunk] = + column.GetChunkIDByOffset(offset); + ASSERT_EQ(chunk_id, i); + ASSERT_EQ(offset_in_chunk, j); + offset++; + } + } +} +} // namespace milvus \ No newline at end of file