2222#include < folly/Hash.h>
2323
2424#include " velox/common/base/BitUtil.h"
25+ #include " velox/common/base/Exceptions.h"
26+ #include " velox/common/base/IOUtils.h"
27+ #include " velox/type/StringView.h"
2528
2629namespace facebook ::velox {
2730
@@ -31,9 +34,15 @@ namespace facebook::velox {
3134// expected entry, we get ~2% false positives. 'hashInput' determines
3235// if the value added or checked needs to be hashed. If this is false,
3336// we assume that the input is already a 64 bit hash number.
34- template <bool hashInput = true >
37+ // case:
38+ // InputType can be one of folly hasher support type when hashInput = false
39+ // InputType can only be uint64_t when hashInput = true
40+ template <class InputType = uint64_t , bool hashInput = true >
3541class BloomFilter {
3642 public:
43+ BloomFilter (){};
44+ BloomFilter (std::vector<uint64_t > bits) : bits_(bits){};
45+
3746 // Prepares 'this' for use with an expected 'capacity'
3847 // entries. Drops any prior content.
3948 void reset (int32_t capacity) {
@@ -42,18 +51,61 @@ class BloomFilter {
4251 bits_.resize (std::max<int32_t >(4 , bits::nextPowerOfTwo (capacity) / 4 ));
4352 }
4453
54+ bool isSet () {
55+ return bits_.size () > 0 ;
56+ }
57+
4558 // Adds 'value'.
46- void insert (uint64_t value) {
59+ void insert (InputType value) {
4760 set (bits_.data (),
4861 bits_.size (),
49- hashInput ? folly::hasher<uint64_t >()(value) : value);
62+ hashInput ? folly::hasher<InputType >()(value) : value);
5063 }
5164
52- bool mayContain (uint64_t value) const {
65+ bool mayContain (InputType value) const {
5366 return test (
5467 bits_.data (),
5568 bits_.size (),
56- hashInput ? folly::hasher<uint64_t >()(value) : value);
69+ hashInput ? folly::hasher<InputType>()(value) : value);
70+ }
71+
72+ // Combines the two bloomFilter bits_ using bitwise OR.
73+ void merge (BloomFilter& bloomFilter) {
74+ if (bits_.size () == 0 ) {
75+ bits_ = bloomFilter.bits_ ;
76+ return ;
77+ } else if (bloomFilter.bits_ .size () == 0 ){
78+ VELOX_FAIL (" Input bit length should not be 0" );
79+ }
80+ VELOX_CHECK_EQ (bits_.size (), bloomFilter.bits_ .size ());
81+ for (auto i = 0 ; i < bloomFilter.bits_ .size (); i++) {
82+ bits_[i] |= bloomFilter.bits_ [i];
83+ }
84+ }
85+
86+ uint32_t serializedSize () {
87+ return 4 /* number of bits */
88+ + bits_.size () * 8 ;
89+ }
90+
91+ void serialize (StringView& output) {
92+ char * outputBuffer = const_cast <char *>(output.data ());
93+ common::OutputByteStream stream (outputBuffer);
94+ stream.appendOne ((int32_t )bits_.size ());
95+ for (auto bit : bits_) {
96+ stream.appendOne (bit);
97+ }
98+ }
99+
100+ static void deserialize (StringView& serializedData, BloomFilter& output) {
101+ auto serialized = serializedData.data ();
102+ common::InputByteStream stream (serialized);
103+ auto size = stream.read <int32_t >();
104+ output.bits_ .resize (size);
105+ auto bitsdata = reinterpret_cast <const uint64_t *>(serialized + stream.offset ());
106+ for (auto i = 0 ; i < size; i++) {
107+ output.bits_ [i] = bitsdata[i];
108+ }
57109 }
58110
59111 private:
0 commit comments