Skip to content

Commit 241f76f

Browse files
codablockUdjinM6
authored andcommitted
Collection of minor performance optimizations (#2855)
* Merge bitcoin#13176: Improve CRollingBloomFilter performance: replace modulus with FastMod 9aac9f9 replace modulus with FastMod (Martin Ankerl) Pull request description: Not sure if this is optimization is necessary, but anyway I have some spare time so here it is. This replaces the slow modulo operation with a much faster 64bit multiplication & shift. This works when the hash is uniformly distributed between 0 and 2^32-1. This speeds up the benchmark by a factor of about 1.3: ``` RollingBloom, 5, 1500000, 3.73733, 4.97569e-07, 4.99002e-07, 4.98372e-07 # before RollingBloom, 5, 1500000, 2.86842, 3.81630e-07, 3.83730e-07, 3.82473e-07 # FastMod ``` Be aware that this changes the internal data of the filter, so this should probably not be used for CBloomFilter because of interoperability problems. Tree-SHA512: 04104f3fb09f56c9d14458a6aad919aeb0a5af944e8ee6a31f00e93c753e22004648c1cd65bf36752b6addec528d19fb665c27b955ce1666a85a928e17afa47a * Use unordered_map in CSporkManager In one of my profiling sessions with many InstantSend transactions happening, calls into CSporkManager added up to about 1% of total CPU time. This is easily avoidable by using unordered maps. * Use std::unordered_map instead of std::map in limitedmap * Use unordered_set for CNode::setAskFor * Add serialization support for unordered maps and sets * Use unordered_map for mapArgs and mapMultiArgs * Let limitedmap prune in batches and use unordered_multimap Due to the batched pruning, there is no need to maintain an ordered map of values anymore. Only when nPruneAfterSize, there is a need to create a temporary ordered vector of values to figure out what can be removed. * Instead of using a multimap for mapAskFor, use a vector which we sort on demand CNode::AskFor will now push entries into an initially unordered vector instead of an ordered multimap. Only when we later want to use vecAskFor in SendMessages, we sort the vector. The vector will actually be mostly sorted in most cases as insertion order usually mimics the desired ordering. Only the last few entries might need some shuffling around. Doing the sort on-demand should be less wasteful then trying to maintain correct order all the time. * Fix compilation of tests * Fix limitedmap tests * Rename limitedmap to unordered_limitedmap to ensure backports conflict This ensures that future backports that depends on limitedmap's ordering conflict so that we are made aware of needed action. * Fix compilation error on Travis
1 parent 82a47f5 commit 241f76f

12 files changed

+174
-69
lines changed

src/bloom.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,14 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak,
331331
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
332332
}
333333

334+
335+
// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value
336+
// which should be the case for a good hash.
337+
// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
338+
static inline uint32_t FastMod(uint32_t x, size_t n) {
339+
return ((uint64_t)x * (uint64_t)n) >> 32;
340+
}
341+
334342
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
335343
{
336344
if (nEntriesThisGeneration == nEntriesPerGeneration) {
@@ -354,7 +362,8 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
354362
for (int n = 0; n < nHashFuncs; n++) {
355363
uint32_t h = RollingBloomHash(n, nTweak, vKey);
356364
int bit = h & 0x3F;
357-
uint32_t pos = (h >> 6) % data.size();
365+
/* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
366+
uint32_t pos = FastMod(h, data.size());
358367
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
359368
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
360369
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
@@ -372,7 +381,7 @@ bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
372381
for (int n = 0; n < nHashFuncs; n++) {
373382
uint32_t h = RollingBloomHash(n, nTweak, vKey);
374383
int bit = h & 0x3F;
375-
uint32_t pos = (h >> 6) % data.size();
384+
uint32_t pos = FastMod(h, data.size());
376385
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
377386
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
378387
return false;

src/limitedmap.h

+56-20
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,43 @@
66
#define BITCOIN_LIMITEDMAP_H
77

88
#include <assert.h>
9-
#include <map>
9+
#include <algorithm>
10+
#include <unordered_map>
11+
#include <vector>
1012

1113
/** STL-like map container that only keeps the N elements with the highest value. */
12-
template <typename K, typename V>
13-
class limitedmap
14+
// WARNING, this was initially the "limitedmap" class from Bitcoin, but now does not maintain ordering. If any backports
15+
// ever start using this map in a way that requires ordering, do NOT use this as it is but instead reintroduce the original
16+
// limitedmap
17+
template <typename K, typename V, typename Hash = std::hash<K>>
18+
class unordered_limitedmap
1419
{
1520
public:
1621
typedef K key_type;
1722
typedef V mapped_type;
1823
typedef std::pair<const key_type, mapped_type> value_type;
19-
typedef typename std::map<K, V>::const_iterator const_iterator;
20-
typedef typename std::map<K, V>::size_type size_type;
24+
typedef typename std::unordered_map<K, V, Hash>::const_iterator const_iterator;
25+
typedef typename std::unordered_map<K, V, Hash>::size_type size_type;
2126

2227
protected:
23-
std::map<K, V> map;
24-
typedef typename std::map<K, V>::iterator iterator;
25-
std::multimap<V, iterator> rmap;
26-
typedef typename std::multimap<V, iterator>::iterator rmap_iterator;
28+
std::unordered_map<K, V, Hash> map;
29+
typedef typename std::unordered_map<K, V, Hash>::iterator iterator;
30+
std::unordered_multimap<V, iterator> rmap;
31+
typedef typename std::unordered_multimap<V, iterator>::iterator rmap_iterator;
2732
size_type nMaxSize;
33+
size_type nPruneAfterSize;
2834

2935
public:
30-
limitedmap(size_type nMaxSizeIn)
36+
unordered_limitedmap(size_type nMaxSizeIn, size_type nPruneAfterSizeIn = 0)
3137
{
3238
assert(nMaxSizeIn > 0);
3339
nMaxSize = nMaxSizeIn;
40+
if (nPruneAfterSizeIn == 0) {
41+
nPruneAfterSize = nMaxSize;
42+
} else {
43+
nPruneAfterSize = nPruneAfterSizeIn;
44+
}
45+
assert(nPruneAfterSize >= nMaxSize);
3446
}
3547
const_iterator begin() const { return map.begin(); }
3648
const_iterator end() const { return map.end(); }
@@ -42,10 +54,7 @@ class limitedmap
4254
{
4355
std::pair<iterator, bool> ret = map.insert(x);
4456
if (ret.second) {
45-
if (map.size() > nMaxSize) {
46-
map.erase(rmap.begin()->second);
47-
rmap.erase(rmap.begin());
48-
}
57+
prune();
4958
rmap.insert(make_pair(x.second, ret.first));
5059
}
5160
}
@@ -85,16 +94,43 @@ class limitedmap
8594
assert(0);
8695
}
8796
size_type max_size() const { return nMaxSize; }
88-
size_type max_size(size_type s)
97+
size_type max_size(size_type nMaxSizeIn, size_type nPruneAfterSizeIn = 0)
8998
{
90-
assert(s > 0);
91-
while (map.size() > s) {
92-
map.erase(rmap.begin()->second);
93-
rmap.erase(rmap.begin());
99+
assert(nMaxSizeIn > 0);
100+
nMaxSize = nMaxSizeIn;
101+
if (nPruneAfterSizeIn == 0) {
102+
nPruneAfterSize = nMaxSize;
103+
} else {
104+
nPruneAfterSize = nPruneAfterSizeIn;
94105
}
95-
nMaxSize = s;
106+
assert(nPruneAfterSize >= nMaxSize);
107+
prune();
96108
return nMaxSize;
97109
}
110+
void prune()
111+
{
112+
if (map.size() <= nPruneAfterSize) {
113+
return;
114+
}
115+
116+
std::vector<rmap_iterator> sortedIterators;
117+
sortedIterators.reserve(map.size());
118+
for (auto it = rmap.begin(); it != rmap.end(); ++it) {
119+
sortedIterators.emplace_back(it);
120+
}
121+
std::sort(sortedIterators.begin(), sortedIterators.end(), [](const rmap_iterator& it1, const rmap_iterator& it2) {
122+
return it1->first < it2->first;
123+
});
124+
125+
size_type tooMuch = map.size() - nMaxSize;
126+
assert(tooMuch > 0);
127+
sortedIterators.resize(tooMuch);
128+
129+
for (auto& it : sortedIterators) {
130+
map.erase(it->second);
131+
rmap.erase(it);
132+
}
133+
}
98134
};
99135

100136
#endif // BITCOIN_LIMITEDMAP_H

src/net.cpp

+11-14
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ std::map<CNetAddr, LocalServiceInfo> mapLocalHost;
8585
static bool vfLimited[NET_MAX] = {};
8686
std::string strSubVersion;
8787

88-
limitedmap<uint256, int64_t> mapAlreadyAskedFor(MAX_INV_SZ);
88+
unordered_limitedmap<uint256, int64_t, StaticSaltedHasher> mapAlreadyAskedFor(MAX_INV_SZ);
8989

9090
// Signals for message handling
9191
static CNodeSignals g_signals;
@@ -3142,11 +3142,11 @@ CNode::~CNode()
31423142

31433143
void CNode::AskFor(const CInv& inv, int64_t doubleRequestDelay)
31443144
{
3145-
if (mapAskFor.size() > MAPASKFOR_MAX_SZ || setAskFor.size() > SETASKFOR_MAX_SZ) {
3145+
if (vecAskFor.size() > MAPASKFOR_MAX_SZ || setAskFor.size() > SETASKFOR_MAX_SZ) {
31463146
int64_t nNow = GetTime();
31473147
if(nNow - nLastWarningTime > WARNING_INTERVAL) {
3148-
LogPrintf("CNode::AskFor -- WARNING: inventory message dropped: mapAskFor.size = %d, setAskFor.size = %d, MAPASKFOR_MAX_SZ = %d, SETASKFOR_MAX_SZ = %d, nSkipped = %d, peer=%d\n",
3149-
mapAskFor.size(), setAskFor.size(), MAPASKFOR_MAX_SZ, SETASKFOR_MAX_SZ, nNumWarningsSkipped, id);
3148+
LogPrintf("CNode::AskFor -- WARNING: inventory message dropped: vecAskFor.size = %d, setAskFor.size = %d, MAPASKFOR_MAX_SZ = %d, SETASKFOR_MAX_SZ = %d, nSkipped = %d, peer=%d\n",
3149+
vecAskFor.size(), setAskFor.size(), MAPASKFOR_MAX_SZ, SETASKFOR_MAX_SZ, nNumWarningsSkipped, id);
31503150
nLastWarningTime = nNow;
31513151
nNumWarningsSkipped = 0;
31523152
}
@@ -3159,10 +3159,10 @@ void CNode::AskFor(const CInv& inv, int64_t doubleRequestDelay)
31593159
if (!setAskFor.insert(inv.hash).second)
31603160
return;
31613161

3162-
// We're using mapAskFor as a priority queue,
3162+
// We're using vecAskFor as a priority queue,
31633163
// the key is the earliest time the request can be sent
31643164
int64_t nRequestTime;
3165-
limitedmap<uint256, int64_t>::const_iterator it = mapAlreadyAskedFor.find(inv.hash);
3165+
auto it = mapAlreadyAskedFor.find(inv.hash);
31663166
if (it != mapAlreadyAskedFor.end())
31673167
nRequestTime = it->second;
31683168
else
@@ -3183,18 +3183,15 @@ void CNode::AskFor(const CInv& inv, int64_t doubleRequestDelay)
31833183
mapAlreadyAskedFor.update(it, nRequestTime);
31843184
else
31853185
mapAlreadyAskedFor.insert(std::make_pair(inv.hash, nRequestTime));
3186-
mapAskFor.insert(std::make_pair(nRequestTime, inv));
3186+
vecAskFor.emplace_back(nRequestTime, inv);
31873187
}
31883188

31893189
void CNode::RemoveAskFor(const uint256& hash)
31903190
{
3191-
setAskFor.erase(hash);
3192-
for (auto it = mapAskFor.begin(); it != mapAskFor.end();) {
3193-
if (it->second.hash == hash) {
3194-
it = mapAskFor.erase(it);
3195-
} else {
3196-
++it;
3197-
}
3191+
if (setAskFor.erase(hash)) {
3192+
vecAskFor.erase(std::remove_if(vecAskFor.begin(), vecAskFor.end(), [&](const std::pair<int64_t, CInv>& item) {
3193+
return item.second.hash == hash;
3194+
}), vecAskFor.end());
31983195
}
31993196
}
32003197

src/net.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "netaddress.h"
1616
#include "protocol.h"
1717
#include "random.h"
18+
#include "saltedhasher.h"
1819
#include "streams.h"
1920
#include "sync.h"
2021
#include "uint256.h"
@@ -28,6 +29,7 @@
2829
#include <thread>
2930
#include <memory>
3031
#include <condition_variable>
32+
#include <unordered_set>
3133

3234
#ifndef WIN32
3335
#include <arpa/inet.h>
@@ -604,7 +606,7 @@ extern bool fDiscover;
604606
extern bool fListen;
605607
extern bool fRelayTxes;
606608

607-
extern limitedmap<uint256, int64_t> mapAlreadyAskedFor;
609+
extern unordered_limitedmap<uint256, int64_t, StaticSaltedHasher> mapAlreadyAskedFor;
608610

609611
/** Subversion as sent to the P2P network in `version` messages */
610612
extern std::string strSubVersion;
@@ -792,8 +794,8 @@ class CNode
792794
// List of non-tx/non-block inventory items
793795
std::vector<CInv> vInventoryOtherToSend;
794796
CCriticalSection cs_inventory;
795-
std::set<uint256> setAskFor;
796-
std::multimap<int64_t, CInv> mapAskFor;
797+
std::unordered_set<uint256, StaticSaltedHasher> setAskFor;
798+
std::vector<std::pair<int64_t, CInv>> vecAskFor;
797799
int64_t nNextInvSend;
798800
// Used for headers announcements - unfiltered blocks to relay
799801
// Also protected by cs_inventory

src/net_processing.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -3732,9 +3732,11 @@ bool SendMessages(CNode* pto, CConnman& connman, const std::atomic<bool>& interr
37323732
//
37333733
// Message: getdata (non-blocks)
37343734
//
3735-
while (!pto->mapAskFor.empty() && (*pto->mapAskFor.begin()).first <= nNow)
3735+
std::sort(pto->vecAskFor.begin(), pto->vecAskFor.end());
3736+
auto it = pto->vecAskFor.begin();
3737+
while (it != pto->vecAskFor.end() && it->first <= nNow)
37363738
{
3737-
const CInv& inv = (*pto->mapAskFor.begin()).second;
3739+
const CInv& inv = it->second;
37383740
if (!AlreadyHave(inv))
37393741
{
37403742
LogPrint("net", "SendMessages -- GETDATA -- requesting inv = %s peer=%d\n", inv.ToString(), pto->id);
@@ -3750,8 +3752,9 @@ bool SendMessages(CNode* pto, CConnman& connman, const std::atomic<bool>& interr
37503752
LogPrint("net", "SendMessages -- GETDATA -- already have inv = %s peer=%d\n", inv.ToString(), pto->id);
37513753
pto->setAskFor.erase(inv.hash);
37523754
}
3753-
pto->mapAskFor.erase(pto->mapAskFor.begin());
3755+
++it;
37543756
}
3757+
pto->vecAskFor.erase(pto->vecAskFor.begin(), it);
37553758
if (!vGetData.empty()) {
37563759
connman.PushMessage(pto, msgMaker.Make(NetMsgType::GETDATA, vGetData));
37573760
LogPrint("net", "SendMessages -- GETDATA -- pushed size = %lu peer=%d\n", vGetData.size(), pto->id);

0 commit comments

Comments
 (0)