Skip to content

Commit

Permalink
Integrate ASN bucketing in Addrman and add tests
Browse files Browse the repository at this point in the history
Instead of using /16 netgroups to bucket nodes in Addrman for connection
diversification, ASN, which better represents an actor in terms
of network-layer infrastructure, is used.
For testing, asmap.raw is used. It represents a minimal
asmap needed for testing purposes.

Adaptation of btc@ec45646de9e62b3d42c85716bfeb06d8f2b507dc with the latest version of the asmap.raw file.
  • Loading branch information
naumenkogs authored and furszy committed Jul 28, 2021
1 parent 718f1df commit 4c3ae7d
Show file tree
Hide file tree
Showing 11 changed files with 495 additions and 93 deletions.
12 changes: 11 additions & 1 deletion src/Makefile.test.include
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ JSON_TEST_FILES = \
test/data/merkle_commitments_sapling.json \
test/data/sapling_key_components.json

RAW_TEST_FILES =
RAW_TEST_FILES = \
test/data/asmap.raw

GENERATED_TEST_FILES = $(JSON_TEST_FILES:.json=.json.h) $(RAW_TEST_FILES:.raw=.raw.h)

Expand Down Expand Up @@ -423,3 +424,12 @@ check-local: check-sapling check-standard
echo "};};"; \
} > "$@.new" && mv -f "$@.new" "$@"
@echo "Generated $@"

%.raw.h: %.raw
@$(MKDIR_P) $(@D)
@{ \
echo "static unsigned const char $(*F)_raw[] = {" && \
$(HEXDUMP) -v -e '8/1 "0x%02x, "' -e '"\n"' $< | $(SED) -e 's/0x ,//g' && \
echo "};"; \
} > "$@.new" && mv -f "$@.new" "$@"
@echo "Generated $@"
63 changes: 43 additions & 20 deletions src/addrman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@
#include "streams.h"


int CAddrInfo::GetTriedBucket(const uint256& nKey) const
int CAddrInfo::GetTriedBucket(const uint256& nKey, const std::vector<bool> &asmap) const
{
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetKey()).GetHash().GetCheapHash();
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup() << (hash1 % ADDRMAN_TRIED_BUCKETS_PER_GROUP)).GetHash().GetCheapHash();
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup(asmap) << (hash1 % ADDRMAN_TRIED_BUCKETS_PER_GROUP)).GetHash().GetCheapHash();
return hash2 % ADDRMAN_TRIED_BUCKET_COUNT;
}

int CAddrInfo::GetNewBucket(const uint256& nKey, const CNetAddr& src) const
int CAddrInfo::GetNewBucket(const uint256& nKey, const CNetAddr& src, const std::vector<bool> &asmap) const
{
std::vector<unsigned char> vchSourceGroupKey = src.GetGroup();
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup() << vchSourceGroupKey).GetHash().GetCheapHash();
std::vector<unsigned char> vchSourceGroupKey = src.GetGroup(asmap);
uint64_t hash1 = (CHashWriter(SER_GETHASH, 0) << nKey << GetGroup(asmap) << vchSourceGroupKey).GetHash().GetCheapHash();
uint64_t hash2 = (CHashWriter(SER_GETHASH, 0) << nKey << vchSourceGroupKey << (hash1 % ADDRMAN_NEW_BUCKETS_PER_SOURCE_GROUP)).GetHash().GetCheapHash();
return hash2 % ADDRMAN_NEW_BUCKET_COUNT;
}
Expand Down Expand Up @@ -156,7 +156,7 @@ void CAddrMan::MakeTried(CAddrInfo& info, int nId)
assert(info.nRefCount == 0);

// which tried bucket to move the entry to
int nKBucket = info.GetTriedBucket(nKey);
int nKBucket = info.GetTriedBucket(nKey, m_asmap);
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);

// first make space to add it (the existing tried entry there is moved to new, deleting whatever is there).
Expand All @@ -172,7 +172,7 @@ void CAddrMan::MakeTried(CAddrInfo& info, int nId)
nTried--;

// find which new bucket it belongs to
int nUBucket = infoOld.GetNewBucket(nKey);
int nUBucket = infoOld.GetNewBucket(nKey, m_asmap);
int nUBucketPos = infoOld.GetBucketPosition(nKey, true, nUBucket);
ClearNew(nUBucket, nUBucketPos);
assert(vvNew[nUBucket][nUBucketPos] == -1);
Expand Down Expand Up @@ -236,7 +236,7 @@ void CAddrMan::Good_(const CService& addr, bool test_before_evict, int64_t nTime
return;

// which tried bucket to move the entry to
int tried_bucket = info.GetTriedBucket(nKey);
int tried_bucket = info.GetTriedBucket(nKey, m_asmap);
int tried_bucket_pos = info.GetBucketPosition(nKey, false, tried_bucket);

// Will moving this address into tried evict another entry?
Expand Down Expand Up @@ -304,7 +304,7 @@ bool CAddrMan::Add_(const CAddress& addr, const CNetAddr& source, int64_t nTimeP
fNew = true;
}

int nUBucket = pinfo->GetNewBucket(nKey, source);
int nUBucket = pinfo->GetNewBucket(nKey, source, m_asmap);
int nUBucketPos = pinfo->GetBucketPosition(nKey, true, nUBucket);
if (vvNew[nUBucket][nUBucketPos] != nId) {
bool fInsert = vvNew[nUBucket][nUBucketPos] == -1;
Expand Down Expand Up @@ -438,15 +438,15 @@ int CAddrMan::Check_()

for (int n = 0; n < ADDRMAN_TRIED_BUCKET_COUNT; n++) {
for (int i = 0; i < ADDRMAN_BUCKET_SIZE; i++) {
if (vvTried[n][i] != -1) {
if (!setTried.count(vvTried[n][i]))
return -11;
if (mapInfo[vvTried[n][i]].GetTriedBucket(nKey) != n)
return -17;
if (mapInfo[vvTried[n][i]].GetBucketPosition(nKey, false, n) != i)
return -18;
setTried.erase(vvTried[n][i]);
}
if (vvTried[n][i] != -1) {
if (!setTried.count(vvTried[n][i]))
return -11;
if (mapInfo[vvTried[n][i]].GetTriedBucket(nKey, m_asmap) != n)
return -17;
if (mapInfo[vvTried[n][i]].GetBucketPosition(nKey, false, n) != i)
return -18;
setTried.erase(vvTried[n][i]);
}
}
}

Expand Down Expand Up @@ -547,7 +547,7 @@ void CAddrMan::ResolveCollisions_()
CAddrInfo& info_new = mapInfo[id_new];

// Which tried bucket to move the entry to.
int tried_bucket = info_new.GetTriedBucket(nKey);
int tried_bucket = info_new.GetTriedBucket(nKey, m_asmap);
int tried_bucket_pos = info_new.GetBucketPosition(nKey, false, tried_bucket);
if (!info_new.IsValid()) { // id_new may no longer map to a valid address
erase_collision = true;
Expand Down Expand Up @@ -611,10 +611,33 @@ CAddrInfo CAddrMan::SelectTriedCollision_()
CAddrInfo& newInfo = mapInfo[id_new];

// which tried bucket to move the entry to
int tried_bucket = newInfo.GetTriedBucket(nKey);
int tried_bucket = newInfo.GetTriedBucket(nKey, m_asmap);
int tried_bucket_pos = newInfo.GetBucketPosition(nKey, false, tried_bucket);

int id_old = vvTried[tried_bucket][tried_bucket_pos];

return mapInfo[id_old];
}

std::vector<bool> CAddrMan::DecodeAsmap(fs::path path)
{
std::vector<bool> bits;
FILE *filestr = fsbridge::fopen(path, "rb");
CAutoFile file(filestr, SER_DISK, CLIENT_VERSION);
if (file.IsNull()) {
LogPrintf("Failed to open asmap file from disk.\n");
return bits;
}
fseek(filestr, 0, SEEK_END);
int length = ftell(filestr);
LogPrintf("Opened asmap file %s (%d bytes) from disk.\n", path, length);
fseek(filestr, 0, SEEK_SET);
char cur_byte;
for (int i = 0; i < length; ++i) {
file >> cur_byte;
for (int bit = 0; bit < 8; ++bit) {
bits.push_back((cur_byte >> bit) & 1);
}
}
return bits;
}
101 changes: 76 additions & 25 deletions src/addrman.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@
#include "sync.h"
#include "timedata.h"
#include "util/system.h"
#include "clientversion.h"

#include <map>
#include <set>
#include <stdint.h>
#include <vector>
#include <iostream>
#include <streams.h>
#include <fs.h>
#include <hash.h>


/**
* Extended statistics about a CAddress
Expand Down Expand Up @@ -69,15 +75,15 @@ class CAddrInfo : public CAddress
}

//! Calculate in which "tried" bucket this entry belongs
int GetTriedBucket(const uint256& nKey) const;
int GetTriedBucket(const uint256& nKey, const std::vector<bool>& asmap) const;

//! Calculate in which "new" bucket this entry belongs, given a certain source
int GetNewBucket(const uint256& nKey, const CNetAddr& src) const;
int GetNewBucket(const uint256& nKey, const CNetAddr& src, const std::vector<bool>& asmap) const;

//! Calculate in which "new" bucket this entry belongs, using its default source
int GetNewBucket(const uint256& nKey) const
int GetNewBucket(const uint256& nKey, const std::vector<bool>& asmap) const
{
return GetNewBucket(nKey, source);
return GetNewBucket(nKey, source, asmap);
}

//! Calculate in which position of a bucket to store this entry.
Expand Down Expand Up @@ -171,6 +177,7 @@ static const int64_t ADDRMAN_TEST_WINDOW = 40*60; // 40 minutes
*/
class CAddrMan
{
friend class CAddrManTest;
protected:
//! critical section to protect the inner data structures
mutable RecursiveMutex cs;
Expand Down Expand Up @@ -265,9 +272,29 @@ class CAddrMan
void SetServices_(const CService& addr, ServiceFlags nServices) EXCLUSIVE_LOCKS_REQUIRED(cs);

public:
// Compressed IP->ASN mapping, loaded from a file when a node starts.
// Should be always empty if no file was provided.
// This mapping is then used for bucketing nodes in Addrman.
//
// If asmap is provided, nodes will be bucketed by
// AS they belong to, in order to make impossible for a node
// to connect to several nodes hosted in a single AS.
// This is done in response to Erebus attack, but also to generally
// diversify the connections every node creates,
// especially useful when a large fraction of nodes
// operate under a couple of cloud providers.
//
// If a new asmap was provided, the existing records
// would be re-bucketed accordingly.
std::vector<bool> m_asmap;

// Read asmap from provided binary file
static std::vector<bool> DecodeAsmap(fs::path path);


/**
* serialized format:
* * version byte (currently 1)
* * version byte (1 for pre-asmap files, 2 for files including asmap version)
* * 0x20 + nKey (serialized as if it were a vector, for backward compatibility)
* * nNew
* * nTried
Expand Down Expand Up @@ -299,7 +326,7 @@ class CAddrMan
{
LOCK(cs);

unsigned char nVersion = 1;
unsigned char nVersion = 2;
s << nVersion;
s << ((unsigned char)32);
s << nKey;
Expand Down Expand Up @@ -342,6 +369,13 @@ class CAddrMan
}
}
}
// Store asmap version after bucket entries so that it
// can be ignored by older clients for backward compatibility.
uint256 asmap_version;
if (m_asmap.size() != 0) {
asmap_version = SerializeHash(m_asmap);
}
s << asmap_version;
}

template <typename Stream>
Expand All @@ -350,7 +384,6 @@ class CAddrMan
LOCK(cs);

Clear();

unsigned char nVersion;
s >> nVersion;
unsigned char nKeySize;
Expand Down Expand Up @@ -380,16 +413,6 @@ class CAddrMan
mapAddr[info] = n;
info.nRandomPos = vRandom.size();
vRandom.push_back(n);
if (nVersion != 1 || nUBuckets != ADDRMAN_NEW_BUCKET_COUNT) {
// In case the new table data cannot be used (nVersion unknown, or bucket count wrong),
// immediately try to give them a reference based on their primary source address.
int nUBucket = info.GetNewBucket(nKey);
int nUBucketPos = info.GetBucketPosition(nKey, true, nUBucket);
if (vvNew[nUBucket][nUBucketPos] == -1) {
vvNew[nUBucket][nUBucketPos] = n;
info.nRefCount++;
}
}
}
nIdCount = nNew;

Expand All @@ -398,7 +421,7 @@ class CAddrMan
for (int n = 0; n < nTried; n++) {
CAddrInfo info;
s >> info;
int nKBucket = info.GetTriedBucket(nKey);
int nKBucket = info.GetTriedBucket(nKey, m_asmap);
int nKBucketPos = info.GetBucketPosition(nKey, false, nKBucket);
if (vvTried[nKBucket][nKBucketPos] == -1) {
info.nRandomPos = vRandom.size();
Expand All @@ -414,20 +437,48 @@ class CAddrMan
}
nTried -= nLost;

// Deserialize positions in the new table (if possible).
// Store positions in the new table buckets to apply later (if possible).
std::map<int, int> entryToBucket; // Represents which entry belonged to which bucket when serializing

for (int bucket = 0; bucket < nUBuckets; bucket++) {
int nSize = 0;
s >> nSize;
for (int n = 0; n < nSize; n++) {
int nIndex = 0;
s >> nIndex;
if (nIndex >= 0 && nIndex < nNew) {
CAddrInfo& info = mapInfo[nIndex];
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
if (nVersion == 1 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS) {
info.nRefCount++;
vvNew[bucket][nUBucketPos] = nIndex;
}
entryToBucket[nIndex] = bucket;
}
}
}

uint256 supplied_asmap_version;
if (m_asmap.size() != 0) {
supplied_asmap_version = SerializeHash(m_asmap);
}
uint256 serialized_asmap_version;
if (nVersion > 1) {
s >> serialized_asmap_version;
}

for (int n = 0; n < nNew; n++) {
CAddrInfo &info = mapInfo[n];
int bucket = entryToBucket[n];
int nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
if (nVersion == 2 && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 &&
info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) {
// Bucketing has not changed, using existing bucket positions for the new table
vvNew[bucket][nUBucketPos] = n;
info.nRefCount++;
} else {
// In case the new table data cannot be used (nVersion unknown, bucket count wrong or new asmap),
// try to give them a reference based on their primary source address.
LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
bucket = info.GetNewBucket(nKey, m_asmap);
nUBucketPos = info.GetBucketPosition(nKey, true, bucket);
if (vvNew[bucket][nUBucketPos] == -1) {
vvNew[bucket][nUBucketPos] = n;
info.nRefCount++;
}
}
}
Expand Down
Loading

0 comments on commit 4c3ae7d

Please sign in to comment.