-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add null bitmap concatenater #34914
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#include "common/NullBitmapConcatenater.h" | ||
#include <cstdint> | ||
namespace milvus { | ||
void | ||
NullBitmapConcatenater::AddBitmap(uint8_t* bitmap, size_t size) { | ||
bitmaps_.emplace_back(bitmap, size); | ||
} | ||
|
||
void | ||
NullBitmapConcatenater::ConcatenateBitmaps() { | ||
size_t total_size = 0; | ||
for (const auto& bitmap : bitmaps_) { | ||
total_size += bitmap.second; | ||
} | ||
concatenated_bitmap_.first = new uint8_t[(total_size + 7) / 8]; | ||
concatenated_bitmap_.second = total_size; | ||
|
||
size_t offset = 0; | ||
for (const auto& bitmap : bitmaps_) { | ||
if (bitmap.first == nullptr) { | ||
// nullptr means nullable is false or no null value | ||
// set offset to offset + bitmap.second bits are 0 | ||
offset += bitmap.second; | ||
continue; | ||
} | ||
// copy bitmap.second bits from bitmap.first to concatenated_bitmap_.first | ||
uint8_t dst_byte = offset / 8; | ||
uint8_t dst_offset = offset % 8; | ||
if (dst_offset) { | ||
// example: | ||
// mask: | 1 1 0 0 0 0 0 0 | | ||
// +-----+-------------+ | ||
// bitmap ... | | | ... | ||
// +-----+-------------+ | ||
// | dst_offset | | ||
// +-------------+-----+-------------+-----+ | ||
// concatenated ... | | | | | ... | ||
// +-------------+-----+-------------+-----+ | ||
// | dst_byte | dst_byte + 1 | | ||
// always treat the last byte as trailing bits | ||
auto src_byte_cnt = (bitmap.second + 7) / 8 - 1; | ||
auto trailing_bits = bitmap.second % 8 == 0 ? 8 : bitmap.second % 8; | ||
for (size_t i = 0; i < src_byte_cnt; i++, dst_byte++) { | ||
uint8_t data_byte = bitmap.first[i]; | ||
concatenated_bitmap_.first[dst_byte] |= data_byte >> dst_offset; | ||
concatenated_bitmap_.first[dst_byte + 1] = data_byte | ||
<< (8 - dst_offset); | ||
} | ||
// process trailing bits | ||
// trailing bits may can be wrapped by remaing bits | ||
auto masked_trailing = | ||
bitmap.first[src_byte_cnt] & (0xff << (8 - trailing_bits)); | ||
concatenated_bitmap_.first[dst_byte] |= | ||
masked_trailing >> dst_offset; | ||
if (trailing_bits > 8 - dst_offset) { | ||
concatenated_bitmap_.first[dst_byte + 1] = masked_trailing | ||
<< (8 - dst_offset); | ||
} | ||
} else { | ||
auto src_byte_cnt = (bitmap.second + 7) / 8; | ||
auto trailing_bits = bitmap.second % 8 == 0 ? 8 : bitmap.second % 8; | ||
memcpy(concatenated_bitmap_.first + dst_byte, | ||
bitmap.first, | ||
src_byte_cnt); | ||
concatenated_bitmap_.first[dst_byte + src_byte_cnt - 1] &= | ||
(0xff << (8 - trailing_bits)); | ||
} | ||
offset += bitmap.second; | ||
} | ||
} | ||
} // namespace milvus |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#pragma once | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <cstring> | ||
#include <utility> | ||
#include <vector> | ||
namespace milvus { | ||
|
||
class NullBitmapConcatenater { | ||
public: | ||
NullBitmapConcatenater() = default; | ||
|
||
void | ||
AddBitmap(uint8_t* bitmap, size_t size); | ||
|
||
// Do not call this function multiple times | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
it's ok if we define a function called NullBitmapConcatenater but I prefer to use a class to manage allocated memory. |
||
void | ||
ConcatenateBitmaps(); | ||
|
||
void | ||
Clear() { | ||
bitmaps_.clear(); | ||
if (concatenated_bitmap_.first) { | ||
delete[] concatenated_bitmap_.first; | ||
concatenated_bitmap_.first = nullptr; | ||
} | ||
} | ||
|
||
std::pair<uint8_t*, size_t> | ||
GetConcatenatedBitmap() { | ||
return concatenated_bitmap_; | ||
} | ||
|
||
~NullBitmapConcatenater() { | ||
if (concatenated_bitmap_.first) { | ||
delete[] concatenated_bitmap_.first; | ||
} | ||
} | ||
|
||
private: | ||
std::vector<std::pair<uint8_t*, size_t>> bitmaps_; | ||
std::pair<uint8_t*, size_t> concatenated_bitmap_; | ||
}; | ||
} // namespace milvus |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#include <gtest/gtest.h> | ||
#include "common/NullBitmapConcatenater.h" | ||
|
||
TEST(null_bitmap_concat_test, test_concatenate) { | ||
milvus::NullBitmapConcatenater c; | ||
uint8_t bitmap1[1] = {0b00000001}; | ||
size_t bitmap1_size = 8; | ||
c.AddBitmap(bitmap1, bitmap1_size); | ||
c.AddBitmap(bitmap1, bitmap1_size); | ||
c.ConcatenateBitmaps(); | ||
auto c_bitmap = c.GetConcatenatedBitmap(); | ||
EXPECT_EQ(2 * bitmap1_size, c_bitmap.second); | ||
std::string s1(reinterpret_cast<char*>(c_bitmap.first), | ||
(c_bitmap.second + 7) / 8); | ||
std::string s2 = "\x01\x01"; | ||
EXPECT_EQ(s1, s2); | ||
|
||
c.Clear(); | ||
uint8_t bitmap2[2] = {0b00000001, 0b11111111}; | ||
size_t bitmap2_size = 10; | ||
c.AddBitmap(bitmap2, bitmap2_size); | ||
c.AddBitmap(bitmap1, bitmap1_size); | ||
c.ConcatenateBitmaps(); | ||
c_bitmap = c.GetConcatenatedBitmap(); | ||
EXPECT_EQ(bitmap2_size + bitmap1_size, c_bitmap.second); | ||
s1 = {reinterpret_cast<char*>(c_bitmap.first), (c_bitmap.second + 7) / 8}; | ||
s2 = "\x01\xc0\x40"; // 00000001 11000000 01000000 | ||
EXPECT_EQ(s1, s2); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
0 means the is not null right? null_bitmap_data taken from arrow, 0 means is null here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh yes, they use validity maps so 0 is invalid. I will fix it later.