Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add address-based index (attempt 4?) #14053

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ BITCOIN_CORE_H = \
fs.h \
httprpc.h \
httpserver.h \
index/addrindex.h \
index/base.h \
index/blockfilterindex.h \
index/disktxpos.h \
marcinja marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -294,6 +295,7 @@ libbitcoin_server_a_SOURCES = \
flatfile.cpp \
httprpc.cpp \
httpserver.cpp \
index/addrindex.cpp \
index/base.cpp \
index/blockfilterindex.cpp \
index/txindex.cpp \
Expand Down
1 change: 1 addition & 0 deletions src/Makefile.test.include
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ FUZZ_SUITE_LD_COMMON = \

# test_bitcoin binary #
BITCOIN_TESTS =\
test/addrindex_tests.cpp \
test/arith_uint256_tests.cpp \
test/scriptnum10.h \
test/addrman_tests.cpp \
Expand Down
279 changes: 279 additions & 0 deletions src/index/addrindex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
// Copyright (c) 2019 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <dbwrapper.h>
#include <hash.h>
#include <index/addrindex.h>
#include <index/disktxpos.h>
#include <shutdown.h>
#include <primitives/transaction.h>
#include <random.h>
#include <script/standard.h>
#include <txdb.h>
#include <validation.h>
#include <vector>
#include <uint256.h>

std::unique_ptr<AddrIndex> g_addr_index;

/*
* The address index stores two main types of objects that allow for
* script-based/address-based look-ups of all created outputs and all spent
* outputs in the Bitcoin blockchain. These are differentiated by their key_type
* as either DBKeyType::SPENT or DBKekyType::CREATED. The address index also
* stores one unique global value under the DBKeyType::SEED key that seeds the
* MurmurHash3 hasher used to create AddrIds.
*
* The DB keys are structured as follows: <addr_id, key_type, outpoint>
*
* addr_id is the hash of the script_pub_key computed using MurmurHash3, a short
* non-cryptographic hash function. It also functions as the search key, since
* LevelDB keys are iterated through in lexicograpical order. Collisions are
* resolved by storing the full script_pub_key in the DB value. This can be
* checked against the script used to make a look-up in the index.
*
* key_type is SPENT when the outpoint stored in the key is spent, i.e. it is
* used as a prevout in a transaction input. It is CREATED when the outpoint is
* created as a new COutpoint in that transaction.
*
* outpoints are stored in the key as opposed to in the DB value to preserve
* uniqueness and to support multiple values for a single addr_id and key_type
* combination. LevelDB only allows one value stored for each key.
*
*
* The DB values are simply: <CDiskTxPos, CScript>
*
* The tx_pos (CDiskTxPos) value is the transaction in which:
* - the outpoint in the key was spent (for SPENT keys)
* - OR where the outpoint was created (for CREATED keys)
*
* The CScript (the script_pub_key) is used for collision resolution.
*/
using AddrId = unsigned int;
// DBKeyType is used by the address index to distinguish between the
// different kinds of values stored.
enum class DBKeyType : uint8_t {
SEED, // Seed used for MurmurHash3 inside GetAddrId
SPENT, // Used for values in the index indicating a spend
CREATED, // Used for values in the index indicating the creation of an input
};


namespace {

struct DBKey {
AddrId m_addr_id;
DBKeyType m_key_type;
COutPoint m_outpoint;

DBKey() {}
explicit DBKey(DBKeyType key_type, AddrId addr_id, COutPoint outpoint) : m_addr_id(addr_id), m_key_type(key_type), m_outpoint(outpoint) {}

SERIALIZE_METHODS(DBKey, obj) {
uint8_t key_type;
SER_WRITE(obj, key_type = static_cast<uint8_t>(obj.m_key_type));

READWRITE(obj.m_addr_id, key_type, obj.m_outpoint);

SER_READ(obj, obj.m_key_type = static_cast<DBKeyType>(key_type));
// Check if the key type is a valid key. SEED keys are excluded because they
// are never created with this type.
if ((obj.m_key_type != DBKeyType::SPENT) && (obj.m_key_type != DBKeyType::CREATED)) {
throw std::ios_base::failure("Invalid key type for address index DB key");
}
}
};

}; // namespace

// The address index stores information needed to get relevant transactions,
// and a copy of the CScript to double check against in case of hash collisions.
using DBValue = std::pair<CDiskTxPos, CScript>;

/** Access to the addr_index database (indexes/addr_index/)*/
class AddrIndex::DB : public BaseIndex::DB
{
public:
explicit DB(size_t n_cache_size, bool f_memory = false, bool f_wipe = false);

/** ReadAddrIndex returns the set of entries stored in the index for this addr_id. */
std::vector<std::pair<DBKey, DBValue>> ReadAddrIndex(const int max_count, const int skip, const unsigned int addr_id, const CScript& script);

/** WriteToIndex writes the input vector of database entries into the index. */
bool WriteToIndex(const std::vector<std::pair<DBKey, DBValue>> &entries);

/** SetupHashSeed is used to create/backup/restore the seed used by the index for hashing. */
unsigned int SetupHashSeed();
};

AddrIndex::DB::DB(size_t n_cache_size, bool f_memory, bool f_wipe) :
BaseIndex::DB(GetDataDir() / "indexes" / "addr_index", n_cache_size, f_memory, f_wipe)
{}

BaseIndex::DB& AddrIndex::GetDB() const { return *m_db; }

std::vector<std::pair<DBKey, DBValue>> AddrIndex::DB::ReadAddrIndex(const int max_count, const int skip, const unsigned int addr_id, const CScript& script)
{
std::vector<std::pair<DBKey, DBValue>> result;
AddrId search_key = addr_id;

std::unique_ptr<CDBIterator> iter(NewIterator());
iter->Seek(search_key);
int i = 0;
while (iter->Valid() && i < max_count) {
DBKey key;
DBValue value;
if (!iter->GetKey(key) || key.m_addr_id != addr_id) break;
if (!iter->GetValue(value)) {
LogPrintf("%s: Failed to read value stored under key with addr_id: %d\n", __func__, addr_id);
break;
}

// Check that the stored script matches the one we're searching for, in case of hash collisions.
if (value.second == script && i >= skip) {
result.emplace_back(std::make_pair(key, value));
}
iter->Next();
i++;
}

return result;
}

bool AddrIndex::Init() {
m_hash_seed = m_db->SetupHashSeed();
return BaseIndex::Init();
}

AddrIndex::AddrIndex(size_t n_cache_size, bool f_memory, bool f_wipe)
: m_db(MakeUnique<AddrIndex::DB>(n_cache_size, f_memory, f_wipe)) {}

unsigned int AddrIndex::DB::SetupHashSeed() {
static const uint8_t seed_key = static_cast<uint8_t>(DBKeyType::SEED);
unsigned int seed;

std::unique_ptr<CDBIterator> iter(NewIterator());
uint8_t key;

// If key is in the index already, read it and return.
iter->Seek(seed_key);
if (iter->Valid() && iter->GetKey(key) && key == seed_key) {
if (!iter->GetValue(seed)) {
return error("%s: Cannot read current %s seed key; index may be corrupted", __func__);
}
return seed;
}

// Generate a random key and write it to the index.
seed = GetRandInt(std::numeric_limits<int>::max());
Write(seed_key, seed);
return seed;
}

AddrIndex::~AddrIndex() {}

unsigned int AddrIndex::GetAddrId(const CScript& script) {
std::vector<unsigned char> script_data;
for (auto it = script.begin(); it != script.end(); ++it) {
script_data.push_back(*it);
}
return MurmurHash3(m_hash_seed, script_data);
}

bool AddrIndex::WriteBlock(const CBlock& block, const CBlockIndex* pindex)
{
CBlockUndo block_undo;
CDiskTxPos pos(pindex->GetBlockPos(), GetSizeOfCompactSize(block.vtx.size()));
std::vector<std::pair<DBKey, DBValue>> entries;

const bool genesis_block = (pindex->nHeight == 0);
if (!genesis_block && !UndoReadFromDisk(block_undo, pindex)) {
return false;
}

for (size_t i = 0; i < block.vtx.size(); ++i) {
const CTransaction& tx = *(block.vtx[i]);
const uint256 tx_hash = tx.GetHash();
for (size_t j = 0; j < tx.vout.size(); ++j) {
CScript script_pub_key = tx.vout[j].scriptPubKey;
DBKey key(DBKeyType::CREATED, GetAddrId(script_pub_key), COutPoint(tx_hash, j));
entries.emplace_back(key, std::make_pair(pos, script_pub_key));
}

// Skip coinbase inputs.
if (!genesis_block && i > 0) {
const CTxUndo& tx_undo = block_undo.vtxundo[i-1];
for (size_t k = 0; k < tx.vin.size(); ++k) {
CScript spent_outputs_scriptpubkey = tx_undo.vprevout[k].out.scriptPubKey;
DBKey key(DBKeyType::SPENT, GetAddrId(spent_outputs_scriptpubkey), tx.vin[k].prevout);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this isn't the information that we want to save in the address index. For a given scriptPubKey, a user wants to know:

  • which txouts (txid, output index) spent to that scriptPubKey (the CREATED DBKeyType above)
  • which txins (txid, input index) consume UTXOs for that scriptPubKey (the SPENT DBKeyType here)

So here, I think you want to save the txid and input index spending the coin. You're actually saving the txid and output index that creates the coin, because you're using the prevout.

(I'm not entirely sure about this. Perhaps you are trying to return the outpoint that created the coin in the spent outputs array, but that's not clear to me from the RPC documentation).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is correct because he is using the prevout in the key. When looking up by this script with this outpoint, one would want to find this transaction because it spends this outpoint.

AFAICT the value just contains the position of the relevant transaction on disk and the scriptPubKey (to detect collisions). The value does not contain any indexes into inputs or outputs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment with your key/value format somewhere and the justification?

entries.emplace_back(key, std::make_pair(pos, spent_outputs_scriptpubkey));
}
}
pos.nTxOffset += ::GetSerializeSize(tx, CLIENT_VERSION);
}

return m_db->WriteToIndex(entries);
}

bool AddrIndex::DB::WriteToIndex(const std::vector<std::pair<DBKey, DBValue>> &entries)
{
CDBBatch batch(*this);
for (const auto& entry : entries) {
batch.Write(entry.first, entry.second);
}
return WriteBatch(batch);
}

// FindTxsByScript fills the spends_result vector with outpoints corresponding
// to the output spent with the given script, and the transaction it was spent
// in. creations_result is filled with outpoints for outputs created with this
// script as their script pubkey, and the transactions they were created in.
// max_count determines the maximum number of results returned. The skip
// parameter sets the number of initial values skipped.
bool AddrIndex::FindTxsByScript(const int max_count,
const int skip,
const CScript& script,
std::vector<std::pair<COutPoint, std::pair<CTransactionRef, uint256>>> &spends_result,
std::vector<std::pair<COutPoint, std::pair<CTransactionRef, uint256>>> &creations_result)
{
auto db_entries = m_db->ReadAddrIndex(max_count, skip, GetAddrId(script), script);
if (db_entries.size() == 0) return false;

for (const auto& entry : db_entries) {
DBKey key = entry.first;
CDiskTxPos pos = entry.second.first;

CAutoFile file(OpenBlockFile(pos, true), SER_DISK, CLIENT_VERSION);
if (file.IsNull()) {
return error("%s: OpenBlockFile failed", __func__);
}
CBlockHeader header;
CTransactionRef tx;
try {
file >> header;
if (fseek(file.Get(), pos.nTxOffset, SEEK_CUR)) {
return error("%s: fseek(...) failed", __func__);
}
file >> tx;
} catch (const std::exception& e) {
return error("%s: Deserialize or I/O error - %s", __func__, e.what());
}
std::pair<CTransactionRef, uint256> result = std::make_pair(tx, header.GetHash());

// Place entry into correct vector depending on its type.
switch (key.m_key_type) {
case DBKeyType::SPENT:
spends_result.emplace_back(std::make_pair(key.m_outpoint, result));
marcinja marked this conversation as resolved.
Show resolved Hide resolved
break;
case DBKeyType::CREATED:
creations_result.emplace_back(std::make_pair(key.m_outpoint, result));
break;
default:
LogPrintf("AddrIndex::DB returned value with unexpected key type.\n");
return false;
}
}

return true;
}
64 changes: 64 additions & 0 deletions src/index/addrindex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) 2019 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#ifndef BITCOIN_INDEX_ADDRINDEX_H
#define BITCOIN_INDEX_ADDRINDEX_H

#include <chain.h>
#include <index/base.h>
#include <vector>
#include <txdb.h>
#include <uint256.h>
#include <primitives/transaction.h>
#include <script/standard.h>
#include <script/script.h>
#include <undo.h>

/**
* AddrIndex is used to look up transactions included in the blockchain by script.
* The index is written to a LevelDB database and records the filesystem
* location of transactions by script.
*/
class AddrIndex final : public BaseIndex
{
protected:
class DB;

private:
const std::unique_ptr<DB> m_db;

// m_hash_seed is used by GetAddrID in its calls to MurmurHash3.
// It is stored in the index, and restored from there on construction
// to maintain consistency.
unsigned int m_hash_seed;

unsigned int GetAddrId(const CScript& script);

protected:
bool Init() override;

bool WriteBlock(const CBlock& block, const CBlockIndex* pindex) override;

BaseIndex::DB& GetDB() const override;

const char* GetName() const override { return "addr_index"; }

public:
/// Constructs the index, which becomes available to be queried.
explicit AddrIndex(size_t n_cache_size, bool f_memory = false, bool f_wipe = false);

// Destructor is declared because this class contains a unique_ptr to an incomplete type.
~AddrIndex() override;

bool FindTxsByScript(const int max_count,
const int skip,
const CScript& dest,
std::vector<std::pair<COutPoint, std::pair<CTransactionRef, uint256>>> &spends_result,
std::vector<std::pair<COutPoint, std::pair<CTransactionRef, uint256>>> &creations_result);
};

/// The global address index, used in FindTxsByScript. May be null.
extern std::unique_ptr<AddrIndex> g_addr_index;

#endif // BITCOIN_INDEX_ADDRINDEX_H
Loading