Skip to content

Commit

Permalink
7. feat(db): Add a transparent address transaction index (ZcashFounda…
Browse files Browse the repository at this point in the history
…tion#4038)

* feat(db): add transaction location index

* Apply suggestions from code review

Co-authored-by: teor <teor@riseup.net>

* add address_tx_ids(); also index spends from addresses

Co-authored-by: teor <teor@riseup.net>
  • Loading branch information
conradoplg and teor2345 authored Apr 13, 2022
1 parent 7b7d22a commit 53a4299
Show file tree
Hide file tree
Showing 19 changed files with 382 additions and 12 deletions.
2 changes: 1 addition & 1 deletion zebra-state/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;

/// The database format version, incremented each time the database format changes.
pub const DATABASE_FORMAT_VERSION: u32 = 21;
pub const DATABASE_FORMAT_VERSION: u32 = 22;

/// The maximum number of blocks to check for NU5 transactions,
/// before we assume we are on a pre-NU5 legacy chain.
Expand Down
4 changes: 4 additions & 0 deletions zebra-state/src/service/finalized_state/disk_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,10 @@ impl DiskDb {
"utxo_loc_by_transparent_addr_loc",
db_options.clone(),
),
rocksdb::ColumnFamilyDescriptor::new(
"tx_loc_by_transparent_addr_loc",
db_options.clone(),
),
// Sprout
rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ pub const TRANSACTION_LOCATION_DISK_BYTES: usize = HEIGHT_DISK_BYTES + TX_INDEX_
any(test, feature = "proptest-impl"),
derive(Arbitrary, Serialize, Deserialize)
)]
pub struct TransactionIndex(u16);
pub struct TransactionIndex(pub(super) u16);

impl TransactionIndex {
/// Creates a transaction index from the inner type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ use crate::service::finalized_state::{
disk_format::{
block::MAX_ON_DISK_HEIGHT,
transparent::{
AddressBalanceLocation, AddressLocation, AddressUnspentOutput, OutputLocation,
AddressBalanceLocation, AddressLocation, AddressTransaction, AddressUnspentOutput,
OutputLocation,
},
IntoDisk, TransactionLocation,
},
Expand Down Expand Up @@ -191,6 +192,20 @@ fn roundtrip_address_unspent_output() {
);
}

#[test]
fn roundtrip_address_transaction() {
zebra_test::init();

proptest!(
|(mut val in any::<AddressTransaction>())| {
*val.address_location_mut().height_mut() = val.address_location().height().clamp(Height(0), MAX_ON_DISK_HEIGHT);
val.transaction_location_mut().height = val.transaction_location().height.clamp(Height(0), MAX_ON_DISK_HEIGHT);

assert_value_properties(val)
}
);
}

#[test]
fn roundtrip_amount() {
zebra_test::init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ expression: cf_names
"tip_chain_value_pool",
"tx_by_hash",
"tx_by_loc",
"tx_loc_by_transparent_addr_loc",
"utxo_by_outpoint",
"utxo_loc_by_transparent_addr_loc",
]
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ expression: empty_column_families
"sprout_anchors: no entries",
"sprout_nullifiers: no entries",
"tip_chain_value_pool: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ expression: empty_column_families
"tip_chain_value_pool: no entries",
"tx_by_hash: no entries",
"tx_by_loc: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ expression: empty_column_families
"sprout_anchors: no entries",
"sprout_nullifiers: no entries",
"tip_chain_value_pool: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
KV(
k: "00000100000000010000020000",
v: "",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
KV(
k: "00000100000000010000020000",
v: "",
),
]
121 changes: 121 additions & 0 deletions zebra-state/src/service/finalized_state/disk_format/transparent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,96 @@ impl AddressUnspentOutput {
}
}

/// A single transaction sent to a [`transparent::Address`].
///
/// We store both the address location key and transaction location value
/// in the RocksDB column family key. This improves insert and delete performance.
///
/// This requires 8 extra bytes for each transaction location,
/// because we repeat the key for each value.
/// But RocksDB compression reduces the duplicate data size on disk.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
#[cfg_attr(
any(test, feature = "proptest-impl"),
derive(Arbitrary, Serialize, Deserialize)
)]
pub struct AddressTransaction {
/// The location of the first [`transparent::Output`] sent to the address in `output`.
address_location: AddressLocation,

/// The location of the transaction sent to the address.
transaction_location: TransactionLocation,
}

impl AddressTransaction {
/// Create a new [`AddressTransaction`] from an address location,
/// and a transaction location.
pub fn new(
address_location: AddressLocation,
transaction_location: TransactionLocation,
) -> AddressTransaction {
AddressTransaction {
address_location,
transaction_location,
}
}

/// Create an [`AddressTransaction`] which starts iteration for the supplied address.
/// Used to look up the first transaction with [`ReadDisk::zs_next_key_value_from`].
///
/// The transaction location is before all unspent output locations in the index.
/// It is always invalid, due to the genesis consensus rules. But this is not an issue
/// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value.
pub fn address_iterator_start(address_location: AddressLocation) -> AddressTransaction {
// Iterating from the lowest possible transaction location gets us the first transaction.
let zero_transaction_location = TransactionLocation::from_usize(Height(0), 0);

AddressTransaction {
address_location,
transaction_location: zero_transaction_location,
}
}

/// Update the transaction location to the next possible transaction for the supplied address.
/// Used to look up the next output with [`ReadDisk::zs_next_key_value_from`].
///
/// The updated transaction location may be invalid, which is not an issue
/// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value.
pub fn address_iterator_next(&mut self) {
// Iterating from the next possible output location gets us the next output,
// even if it is in a later block or transaction.
//
// Consensus: the block size limit is 2MB, which is much lower than the index range.
self.transaction_location.index.0 += 1;
}

/// The location of the first [`transparent::Output`] sent to the address of this output.
///
/// This can be used to look up the address.
pub fn address_location(&self) -> AddressLocation {
self.address_location
}

/// The location of this transaction.
pub fn transaction_location(&self) -> TransactionLocation {
self.transaction_location
}

/// Allows tests to modify the address location.
#[cfg(any(test, feature = "proptest-impl"))]
#[allow(dead_code)]
pub fn address_location_mut(&mut self) -> &mut AddressLocation {
&mut self.address_location
}

/// Allows tests to modify the unspent output location.
#[cfg(any(test, feature = "proptest-impl"))]
#[allow(dead_code)]
pub fn transaction_location_mut(&mut self) -> &mut TransactionLocation {
&mut self.transaction_location
}
}

// Transparent trait impls

/// Returns a byte representing the [`transparent::Address`] variant.
Expand Down Expand Up @@ -547,3 +637,34 @@ impl FromDisk for AddressUnspentOutput {
AddressUnspentOutput::new(address_location, unspent_output_location)
}
}

impl IntoDisk for AddressTransaction {
type Bytes = [u8; OUTPUT_LOCATION_DISK_BYTES + TRANSACTION_LOCATION_DISK_BYTES];

fn as_bytes(&self) -> Self::Bytes {
let address_location_bytes: [u8; OUTPUT_LOCATION_DISK_BYTES] =
self.address_location().as_bytes();
let transaction_location_bytes: [u8; TRANSACTION_LOCATION_DISK_BYTES] =
self.transaction_location().as_bytes();

address_location_bytes
.iter()
.copied()
.chain(transaction_location_bytes.iter().copied())
.collect::<Vec<u8>>()
.try_into()
.expect("concatenation of fixed-sized arrays should have the correct size")
}
}

impl FromDisk for AddressTransaction {
fn from_bytes(disk_bytes: impl AsRef<[u8]>) -> Self {
let (address_location_bytes, transaction_location_bytes) =
disk_bytes.as_ref().split_at(OUTPUT_LOCATION_DISK_BYTES);

let address_location = AddressLocation::from_bytes(address_location_bytes);
let transaction_location = TransactionLocation::from_bytes(transaction_location_bytes);

AddressTransaction::new(address_location, transaction_location)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -432,15 +432,15 @@ fn snapshot_block_and_transaction_data(state: &FinalizedState) {

/// Snapshot transparent address data, using `cargo insta` and RON serialization.
fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
// TODO: transactions for each address (#3951)

let balance_by_transparent_addr = state.cf_handle("balance_by_transparent_addr").unwrap();
let utxo_loc_by_transparent_addr_loc =
state.cf_handle("utxo_loc_by_transparent_addr_loc").unwrap();
let tx_loc_by_transparent_addr_loc = state.cf_handle("tx_loc_by_transparent_addr_loc").unwrap();

let mut stored_address_balances = Vec::new();
let mut stored_address_utxo_locations = Vec::new();
let mut stored_address_utxos = Vec::new();
let mut stored_address_transaction_locations = Vec::new();

// Correctness: Multi-key iteration causes hangs in concurrent code, but seems ok in tests.
let addresses =
Expand All @@ -451,6 +451,12 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
rocksdb::IteratorMode::Start,
)
.count();
let transaction_address_location_count = state
.full_iterator_cf(
&tx_loc_by_transparent_addr_loc,
rocksdb::IteratorMode::Start,
)
.count();

let addresses: Vec<transparent::Address> = addresses
.map(|(key, _value)| transparent::Address::from_bytes(key))
Expand All @@ -463,6 +469,7 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
if height == 0 {
assert_eq!(addresses.len(), 0);
assert_eq!(utxo_address_location_count, 0);
assert_eq!(transaction_address_location_count, 0);
return;
}

Expand All @@ -487,21 +494,37 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
stored_utxos.push(utxo);
}

let mut stored_transaction_locations = Vec::new();
for transaction_location in state.address_transaction_locations(stored_address_location) {
assert_eq!(
transaction_location.address_location(),
stored_address_location
);

stored_transaction_locations.push(transaction_location.transaction_location());
}

// Check that the lists are in chain order
//
// TODO: check that the transaction list is in chain order (#3951)
assert!(
is_sorted(&stored_utxo_locations),
"unsorted: {:?}\n\
for address: {:?}",
stored_utxo_locations,
address,
);
assert!(
is_sorted(&stored_transaction_locations),
"unsorted: {:?}\n\
for address: {:?}",
stored_transaction_locations,
address,
);

// The default raw data serialization is very verbose, so we hex-encode the bytes.
stored_address_balances.push((address.to_string(), stored_address_balance_location));
stored_address_utxo_locations.push((stored_address_location, stored_utxo_locations));
stored_address_utxos.push((address, stored_utxos));
stored_address_transaction_locations.push((address, stored_transaction_locations));
}

// We want to snapshot the order in the database,
Expand All @@ -511,6 +534,10 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
// TODO: change these names to address_utxo_locations and address_utxos
insta::assert_ron_snapshot!("address_utxos", stored_address_utxo_locations);
insta::assert_ron_snapshot!("address_utxo_data", stored_address_utxos);
insta::assert_ron_snapshot!(
"address_transaction_locations",
stored_address_transaction_locations
);
}

/// Return true if `list` is sorted in ascending order.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
]),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
TransactionLocation(
height: Height(2),
index: TransactionIndex(0),
),
]),
]
Loading

0 comments on commit 53a4299

Please sign in to comment.