Skip to content

Commit

Permalink
Merge bitcoin#7934: Improve rolling bloom filter performance and benc…
Browse files Browse the repository at this point in the history
…hmark

1953c40 More efficient bitsliced rolling Bloom filter (Pieter Wuille)
aa62b68 Benchmark rolling bloom filter (Pieter Wuille)
  • Loading branch information
laanwj committed May 9, 2016
2 parents fbd8478 + 1953c40 commit f17032f
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 27 deletions.
3 changes: 2 additions & 1 deletion src/Makefile.bench.include
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ bench_bench_bitcoin_SOURCES = \
bench/bench_bitcoin.cpp \
bench/bench.cpp \
bench/bench.h \
bench/Examples.cpp
bench/Examples.cpp \
bench/rollingbloom.cpp

bench_bench_bitcoin_CPPFLAGS = $(AM_CPPFLAGS) $(BITCOIN_INCLUDES) $(EVENT_CLFAGS) $(EVENT_PTHREADS_CFLAGS) -I$(builddir)/bench/
bench_bench_bitcoin_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS)
Expand Down
43 changes: 43 additions & 0 deletions src/bench/rollingbloom.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) 2016 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <iostream>

#include "bench.h"
#include "bloom.h"
#include "utiltime.h"

static void RollingBloom(benchmark::State& state)
{
CRollingBloomFilter filter(120000, 0.000001);
std::vector<unsigned char> data(32);
uint32_t count = 0;
uint32_t nEntriesPerGeneration = (120000 + 1) / 2;
uint32_t countnow = 0;
uint64_t match = 0;
while (state.KeepRunning()) {
count++;
data[0] = count;
data[1] = count >> 8;
data[2] = count >> 16;
data[3] = count >> 24;
if (countnow == nEntriesPerGeneration) {
int64_t b = GetTimeMicros();
filter.insert(data);
int64_t e = GetTimeMicros();
std::cout << "RollingBloom-refresh,1," << (e-b)*0.000001 << "," << (e-b)*0.000001 << "," << (e-b)*0.000001 << "\n";
countnow = 0;
} else {
filter.insert(data);
}
countnow++;
data[0] = count >> 24;
data[1] = count >> 16;
data[2] = count >> 8;
data[3] = count;
match += filter.contains(data);
}
}

BENCHMARK(RollingBloom);
40 changes: 27 additions & 13 deletions src/bloom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,14 +234,18 @@ CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate)
*/
uint32_t nFilterBits = (uint32_t)ceil(-1.0 * nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs)));
data.clear();
/* We store up to 16 'bits' per data element. */
data.resize((nFilterBits + 15) / 16);
/* For each data element we need to store 2 bits. If both bits are 0, the
* bit is treated as unset. If the bits are (01), (10), or (11), the bit is
* treated as set in generation 1, 2, or 3 respectively.
* These bits are stored in separate integers: position P corresponds to bit
* (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */
data.resize(((nFilterBits + 63) / 64) << 1);
reset();
}

/* Similar to CBloomFilter::Hash */
inline unsigned int CRollingBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const {
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (data.size() * 16);
static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, const std::vector<unsigned char>& vDataToHash) {
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
}

void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
Expand All @@ -252,18 +256,25 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
if (nGeneration == 4) {
nGeneration = 1;
}
uint64_t nGenerationMask1 = -(uint64_t)(nGeneration & 1);
uint64_t nGenerationMask2 = -(uint64_t)(nGeneration >> 1);
/* Wipe old entries that used this generation number. */
for (uint32_t p = 0; p < data.size() * 16; p++) {
if (get(p) == nGeneration) {
put(p, 0);
}
for (uint32_t p = 0; p < data.size(); p += 2) {
uint64_t p1 = data[p], p2 = data[p + 1];
uint64_t mask = (p1 ^ nGenerationMask1) | (p2 ^ nGenerationMask2);
data[p] = p1 & mask;
data[p + 1] = p2 & mask;
}
}
nEntriesThisGeneration++;

for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = Hash(n, vKey);
put(h, nGeneration);
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
uint32_t pos = (h >> 6) % data.size();
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
}
}

Expand All @@ -276,8 +287,11 @@ void CRollingBloomFilter::insert(const uint256& hash)
bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
{
for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = Hash(n, vKey);
if (get(h) == 0) {
uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F;
uint32_t pos = (h >> 6) % data.size();
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
return false;
}
}
Expand All @@ -295,7 +309,7 @@ void CRollingBloomFilter::reset()
nTweak = GetRand(std::numeric_limits<unsigned int>::max());
nEntriesThisGeneration = 0;
nGeneration = 1;
for (std::vector<uint32_t>::iterator it = data.begin(); it != data.end(); it++) {
for (std::vector<uint64_t>::iterator it = data.begin(); it != data.end(); it++) {
*it = 0;
}
}
13 changes: 1 addition & 12 deletions src/bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,9 @@ class CRollingBloomFilter
int nEntriesPerGeneration;
int nEntriesThisGeneration;
int nGeneration;
std::vector<uint32_t> data;
std::vector<uint64_t> data;
unsigned int nTweak;
int nHashFuncs;

unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;

inline int get(uint32_t position) const {
return (data[(position >> 4) % data.size()] >> (2 * (position & 0xF))) & 0x3;
}

inline void put(uint32_t position, uint32_t val) {
uint32_t& cell = data[(position >> 4) % data.size()];
cell = (cell & ~(((uint32_t)3) << (2 * (position & 0xF)))) | (val << (2 * (position & 0xF)));
}
};

#endif // BITCOIN_BLOOM_H
5 changes: 4 additions & 1 deletion src/test/bloom_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,11 +514,14 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
if (i >= 100)
BOOST_CHECK(rb1.contains(data[i-100]));
rb1.insert(data[i]);
BOOST_CHECK(rb1.contains(data[i]));
}

// Insert 999 more random entries:
for (int i = 0; i < 999; i++) {
rb1.insert(RandomData());
std::vector<unsigned char> d = RandomData();
rb1.insert(d);
BOOST_CHECK(rb1.contains(d));
}
// Sanity check to make sure the filter isn't just filling up:
nHits = 0;
Expand Down

0 comments on commit f17032f

Please sign in to comment.