Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add null bitmap concatenater #34914

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions internal/core/src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ set(COMMON_SRC
EasyAssert.cpp
FieldData.cpp
RegexQuery.cpp
NullBitmapConcatenater.cpp
)

add_library(milvus_common SHARED ${COMMON_SRC})
Expand Down
71 changes: 71 additions & 0 deletions internal/core/src/common/NullBitmapConcatenater.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "common/NullBitmapConcatenater.h"
#include <cstdint>
namespace milvus {
void
NullBitmapConcatenater::AddBitmap(uint8_t* bitmap, size_t size) {
bitmaps_.emplace_back(bitmap, size);
}

void
NullBitmapConcatenater::ConcatenateBitmaps() {
size_t total_size = 0;
for (const auto& bitmap : bitmaps_) {
total_size += bitmap.second;
}
concatenated_bitmap_.first = new uint8_t[(total_size + 7) / 8];
concatenated_bitmap_.second = total_size;

size_t offset = 0;
for (const auto& bitmap : bitmaps_) {
if (bitmap.first == nullptr) {
// nullptr means nullable is false or no null value
// set offset to offset + bitmap.second bits are 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0 means the is not null right? null_bitmap_data taken from arrow, 0 means is null here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

0 means the is not null right? null_bitmap_data taken from arrow, 0 means is null here.

oh yes, they use validity maps so 0 is invalid. I will fix it later.

offset += bitmap.second;
continue;

Check warning on line 24 in internal/core/src/common/NullBitmapConcatenater.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/NullBitmapConcatenater.cpp#L23-L24

Added lines #L23 - L24 were not covered by tests
}
// copy bitmap.second bits from bitmap.first to concatenated_bitmap_.first
uint8_t dst_byte = offset / 8;
uint8_t dst_offset = offset % 8;
if (dst_offset) {
// example:
// mask: | 1 1 0 0 0 0 0 0 |
// +-----+-------------+
// bitmap ... | | | ...
// +-----+-------------+
// | dst_offset |
// +-------------+-----+-------------+-----+
// concatenated ... | | | | | ...
// +-------------+-----+-------------+-----+
// | dst_byte | dst_byte + 1 |
// always treat the last byte as trailing bits
auto src_byte_cnt = (bitmap.second + 7) / 8 - 1;
auto trailing_bits = bitmap.second % 8 == 0 ? 8 : bitmap.second % 8;
for (size_t i = 0; i < src_byte_cnt; i++, dst_byte++) {
uint8_t data_byte = bitmap.first[i];
concatenated_bitmap_.first[dst_byte] |= data_byte >> dst_offset;
concatenated_bitmap_.first[dst_byte + 1] = data_byte
<< (8 - dst_offset);

Check warning on line 47 in internal/core/src/common/NullBitmapConcatenater.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/NullBitmapConcatenater.cpp#L44-L47

Added lines #L44 - L47 were not covered by tests
}
// process trailing bits
// trailing bits may can be wrapped by remaing bits
auto masked_trailing =
bitmap.first[src_byte_cnt] & (0xff << (8 - trailing_bits));
concatenated_bitmap_.first[dst_byte] |=
masked_trailing >> dst_offset;
if (trailing_bits > 8 - dst_offset) {
concatenated_bitmap_.first[dst_byte + 1] = masked_trailing
<< (8 - dst_offset);
}
} else {
auto src_byte_cnt = (bitmap.second + 7) / 8;
auto trailing_bits = bitmap.second % 8 == 0 ? 8 : bitmap.second % 8;
memcpy(concatenated_bitmap_.first + dst_byte,
bitmap.first,
src_byte_cnt);
concatenated_bitmap_.first[dst_byte + src_byte_cnt - 1] &=
(0xff << (8 - trailing_bits));
}
offset += bitmap.second;
}
}
} // namespace milvus
44 changes: 44 additions & 0 deletions internal/core/src/common/NullBitmapConcatenater.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#pragma once
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <utility>
#include <vector>
namespace milvus {

class NullBitmapConcatenater {
public:
NullBitmapConcatenater() = default;

void
AddBitmap(uint8_t* bitmap, size_t size);

// Do not call this function multiple times
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make NullBitmapConcatenater a function instead of a class, then?

Copy link
Contributor Author

@sunby sunby Jul 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make NullBitmapConcatenater a function instead of a class, then?

it's ok if we define a function called NullBitmapConcatenater but I prefer to use a class to manage allocated memory.

void
ConcatenateBitmaps();

void
Clear() {
bitmaps_.clear();
if (concatenated_bitmap_.first) {
delete[] concatenated_bitmap_.first;
concatenated_bitmap_.first = nullptr;
}
}

std::pair<uint8_t*, size_t>
GetConcatenatedBitmap() {
return concatenated_bitmap_;
}

~NullBitmapConcatenater() {
if (concatenated_bitmap_.first) {
delete[] concatenated_bitmap_.first;
}
}

private:
std::vector<std::pair<uint8_t*, size_t>> bitmaps_;
std::pair<uint8_t*, size_t> concatenated_bitmap_;
};
} // namespace milvus
1 change: 1 addition & 0 deletions internal/core/unittest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ set(MILVUS_TEST_FILES
test_chunk_vector.cpp
test_mmap_chunk_manager.cpp
test_monitor.cpp
test_null_bitmap_concat.cpp
)

if ( INDEX_ENGINE STREQUAL "cardinal" )
Expand Down
29 changes: 29 additions & 0 deletions internal/core/unittest/test_null_bitmap_concat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include <gtest/gtest.h>
#include "common/NullBitmapConcatenater.h"

TEST(null_bitmap_concat_test, test_concatenate) {
milvus::NullBitmapConcatenater c;
uint8_t bitmap1[1] = {0b00000001};
size_t bitmap1_size = 8;
c.AddBitmap(bitmap1, bitmap1_size);
c.AddBitmap(bitmap1, bitmap1_size);
c.ConcatenateBitmaps();
auto c_bitmap = c.GetConcatenatedBitmap();
EXPECT_EQ(2 * bitmap1_size, c_bitmap.second);
std::string s1(reinterpret_cast<char*>(c_bitmap.first),
(c_bitmap.second + 7) / 8);
std::string s2 = "\x01\x01";
EXPECT_EQ(s1, s2);

c.Clear();
uint8_t bitmap2[2] = {0b00000001, 0b11111111};
size_t bitmap2_size = 10;
c.AddBitmap(bitmap2, bitmap2_size);
c.AddBitmap(bitmap1, bitmap1_size);
c.ConcatenateBitmaps();
c_bitmap = c.GetConcatenatedBitmap();
EXPECT_EQ(bitmap2_size + bitmap1_size, c_bitmap.second);
s1 = {reinterpret_cast<char*>(c_bitmap.first), (c_bitmap.second + 7) / 8};
s2 = "\x01\xc0\x40"; // 00000001 11000000 01000000
EXPECT_EQ(s1, s2);
}
Loading