Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(scan): Add raw database format snapshots to the scanner #8075

Merged
merged 15 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5803,8 +5803,11 @@ dependencies = [
"ff",
"group",
"indexmap 2.1.0",
"insta",
"itertools 0.12.0",
"jubjub",
"proptest",
"proptest-derive",
"rand 0.8.5",
"semver 1.0.20",
"serde",
Expand Down
33 changes: 31 additions & 2 deletions zebra-scan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@ categories = ["cryptography::cryptocurrencies"]

# Production features that activate extra dependencies, or extra features in dependencies

# Test features

proptest-impl = [
"proptest",
"proptest-derive",
"zebra-state/proptest-impl",
"zebra-chain/proptest-impl",
"bls12_381",
"ff",
"group",
"jubjub",
"rand",
"zcash_note_encryption",
]

[dependencies]

color-eyre = "0.6.2"
Expand All @@ -37,15 +52,29 @@ zebra-state = { path = "../zebra-state", version = "1.0.0-beta.31", features = [

chrono = { version = "0.4.31", default-features = false, features = ["clock", "std", "serde"] }

# test feature proptest-impl
proptest = { version = "1.4.0", optional = true }
proptest-derive = { version = "0.4.0", optional = true }

bls12_381 = { version = "0.8.0", optional = true }
ff = { version = "0.13.0", optional = true }
group = { version = "0.13.0", optional = true }
jubjub = { version = "0.10.0", optional = true }
rand = { version = "0.8.5", optional = true }
zcash_note_encryption = { version = "0.4.0", optional = true }

[dev-dependencies]

insta = { version = "1.33.0", features = ["ron", "redactions"] }
tokio = { version = "1.34.0", features = ["test-util"] }

proptest = "1.4.0"
proptest-derive = "0.4.0"
bls12_381 = "0.8.0"
ff = "0.13.0"
group = "0.13.0"
jubjub = "0.10.0"
rand = "0.8.5"
tokio = { version = "1.34.0", features = ["test-util"] }

zcash_note_encryption = "0.4.0"

zebra-state = { path = "../zebra-state", version = "1.0.0-beta.31", features = ["proptest-impl"] }
Expand Down
4 changes: 2 additions & 2 deletions zebra-scan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ pub mod init;
pub mod scan;
pub mod storage;

#[cfg(test)]
mod tests;
#[cfg(any(test, feature = "proptest-impl"))]
pub mod tests;

pub use config::Config;
pub use init::{init, spawn_init};
6 changes: 3 additions & 3 deletions zebra-scan/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ pub async fn scan_height_and_store_results(
let dfvk_res = scanned_block_to_db_result(dfvk_res);
let ivk_res = scanned_block_to_db_result(ivk_res);

storage.add_sapling_results(sapling_key.clone(), height, dfvk_res);
storage.add_sapling_results(sapling_key, height, ivk_res);
storage.add_sapling_results(&sapling_key, height, dfvk_res);
storage.add_sapling_results(&sapling_key, height, ivk_res);

Ok::<_, Report>(())
})
Expand Down Expand Up @@ -398,7 +398,7 @@ fn scanned_block_to_db_result<Nf>(
.map(|tx| {
(
TransactionIndex::from_usize(tx.index),
SaplingScannedResult::from(tx.txid.as_ref()),
SaplingScannedResult::from_bytes_in_display_order(*tx.txid.as_ref()),
)
})
.collect()
Expand Down
13 changes: 10 additions & 3 deletions zebra-scan/src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,13 @@ impl Storage {
///
/// This method can block while writing database files, so it must be inside spawn_blocking()
/// in async code.
pub fn add_sapling_key(&mut self, sapling_key: &SaplingScanningKey, birthday: Option<Height>) {
pub fn add_sapling_key(
&mut self,
sapling_key: &SaplingScanningKey,
birthday: impl Into<Option<Height>>,
) {
let birthday = birthday.into();

// It's ok to write some keys and not others during shutdown, so each key can get its own
// batch. (They will be re-written on startup anyway.)
let mut batch = ScannerWriteBatch::default();
Expand All @@ -93,15 +99,16 @@ impl Storage {
self.sapling_keys_and_birthday_heights()
}

/// Add the sapling results for `height` to the storage.
/// Add the sapling results for `height` to the storage. The results can be any map of
/// [`TransactionIndex`] to [`SaplingScannedResult`].
///
/// # Performance / Hangs
///
/// This method can block while writing database files, so it must be inside spawn_blocking()
/// in async code.
pub fn add_sapling_results(
&mut self,
sapling_key: SaplingScanningKey,
sapling_key: &SaplingScanningKey,
height: Height,
sapling_results: BTreeMap<TransactionIndex, SaplingScannedResult>,
) {
Expand Down
3 changes: 3 additions & 0 deletions zebra-scan/src/storage/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ pub use zebra_state::{

pub mod sapling;

#[cfg(test)]
mod tests;

/// The directory name used to distinguish the scanner database from Zebra's other databases or
/// flat files.
///
Expand Down
4 changes: 3 additions & 1 deletion zebra-scan/src/storage/db/sapling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ impl Storage {
// Reading Sapling database entries

/// Returns the result for a specific database index (key, block height, transaction index).
/// Returns `None` if the result is missing or an empty marker for a birthday or progress
/// height.
//
// TODO: add tests for this method
pub fn sapling_result_for_index(
&self,
index: &SaplingScannedDatabaseIndex,
) -> Option<SaplingScannedResult> {
self.db.zs_get(&self.sapling_tx_ids_cf(), &index)
self.db.zs_get(&self.sapling_tx_ids_cf(), &index).flatten()
teor2345 marked this conversation as resolved.
Show resolved Hide resolved
}

/// Returns the results for a specific key and block height.
Expand Down
3 changes: 3 additions & 0 deletions zebra-scan/src/storage/db/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! General scanner database tests.

mod snapshot;
164 changes: 164 additions & 0 deletions zebra-scan/src/storage/db/tests/snapshot.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//! Raw data snapshot tests for the scanner database format.
//!
//! These tests check:
//! - the name of each column family
//! - the number of key-value entries
//! - the bytes in each key and value
//!
//! These tests currently use fixed test vectors.
//!
//! # Fixing Test Failures
//!
//! If this test fails, run:
//! ```sh
//! cd zebra-scan
//! cargo insta test --review --features shielded-scan
//! ```
//! to update the test snapshots, then commit the `test_*.snap` files using git.
//!
//! # Snapshot Format
//!
//! These snapshots use [RON (Rusty Object Notation)](https://github.com/ron-rs/ron#readme),
//! a text format similar to Rust syntax. Raw byte data is encoded in hexadecimal.
//!
//! Due to `serde` limitations, some object types can't be represented exactly,
//! so RON uses the closest equivalent structure.

use std::{collections::BTreeMap, sync::Arc};

use zebra_chain::{
block::{Block, Height},
parameters::Network::{self, *},
serialization::ZcashDeserializeInto,
};
use zebra_state::{RawBytes, ReadDisk, TransactionIndex, KV};

use crate::{
storage::{db::ScannerDb, Storage},
tests::{FAKE_SAPLING_VIEWING_KEY, ZECPAGES_SAPLING_VIEWING_KEY},
Config,
};

/// Snapshot test for RocksDB column families, and their key-value data.
///
/// These snapshots contain the `default` column family, but it is not used by Zebra.
#[test]
fn test_raw_rocksdb_column_families() {
let _init_guard = zebra_test::init();

test_raw_rocksdb_column_families_with_network(Mainnet);
test_raw_rocksdb_column_families_with_network(Testnet);
}

/// Snapshot raw column families for `network`.
///
/// See [`test_raw_rocksdb_column_families`].
fn test_raw_rocksdb_column_families_with_network(network: Network) {
let mut net_suffix = network.to_string();
net_suffix.make_ascii_lowercase();

let mut storage = Storage::new(&Config::ephemeral(), network);

// Snapshot the column family names
let mut cf_names = storage.db.list_cf().expect("empty database is valid");

// The order that RocksDB returns column families is irrelevant,
// because we always access them by name.
cf_names.sort();

// Assert that column family names are the same, regardless of the network.
// Later, we check they are also the same regardless of the block height.
insta::assert_ron_snapshot!("column_family_names", cf_names);

// Assert that empty databases are the same, regardless of the network.
let mut settings = insta::Settings::clone_current();

settings.set_snapshot_suffix("empty");
settings.bind(|| snapshot_raw_rocksdb_column_family_data(&storage.db, &cf_names));

// Snapshot a birthday that is automatically set to activation height
storage.add_sapling_key(&ZECPAGES_SAPLING_VIEWING_KEY.to_string(), None);
// Snapshot a birthday above activation height
storage.add_sapling_key(&FAKE_SAPLING_VIEWING_KEY.to_string(), Height(1_000_000));

settings.set_snapshot_suffix(format!("{net_suffix}_keys"));
settings.bind(|| snapshot_raw_rocksdb_column_family_data(&storage.db, &cf_names));

// Snapshot raw database data for:
// - mainnet and testnet
// - genesis, block 1, and block 2
let blocks = match network {
Mainnet => &*zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS,
Testnet => &*zebra_test::vectors::CONTINUOUS_TESTNET_BLOCKS,
};

// We limit the number of blocks, because the serialized data is a few kilobytes per block.
for height in 0..=2 {
let block: Arc<Block> = blocks
.get(&height)
.expect("block height has test data")
.zcash_deserialize_into()
.expect("test data deserializes");

// Fake results from the first few blocks
storage.add_sapling_results(
&ZECPAGES_SAPLING_VIEWING_KEY.to_string(),
Height(height),
block
.transactions
.iter()
.enumerate()
.map(|(index, tx)| (TransactionIndex::from_usize(index), tx.hash().into()))
.collect(),
);

let mut settings = insta::Settings::clone_current();
settings.set_snapshot_suffix(format!("{net_suffix}_{height}"));

settings.bind(|| snapshot_raw_rocksdb_column_family_data(&storage.db, &cf_names));
}
}

/// Snapshot the data in each column family, using `cargo insta` and RON serialization.
fn snapshot_raw_rocksdb_column_family_data(db: &ScannerDb, original_cf_names: &[String]) {
let mut new_cf_names = db.list_cf().expect("empty database is valid");
new_cf_names.sort();

// Assert that column family names are the same, regardless of the network or block height.
assert_eq!(
original_cf_names, new_cf_names,
"unexpected extra column families",
);

let mut empty_column_families = Vec::new();

// Now run the data snapshots
for cf_name in original_cf_names {
let cf_handle = db
.cf_handle(cf_name)
.expect("RocksDB API provides correct names");

// Correctness: Multi-key iteration causes hangs in concurrent code, but seems ok in tests.
let cf_items: BTreeMap<RawBytes, RawBytes> = db.zs_items_in_range_ordered(&cf_handle, ..);

// The default raw data serialization is very verbose, so we hex-encode the bytes.
let cf_data: Vec<KV> = cf_items
.iter()
.map(|(key, value)| KV::new(key.raw_bytes(), value.raw_bytes()))
.collect();

if cf_name == "default" {
assert_eq!(cf_data.len(), 0, "default column family is never used");
} else if cf_data.is_empty() {
// distinguish column family names from empty column families
empty_column_families.push(format!("{cf_name}: no entries"));
} else {
// The note commitment tree snapshots will change if the trees do not have cached roots.
// But we expect them to always have cached roots,
// because those roots are used to populate the anchor column families.
insta::assert_ron_snapshot!(format!("{cf_name}_raw_data"), cf_data);
}
}

insta::assert_ron_snapshot!("empty_column_families", empty_column_families);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: cf_names
---
[
"default",
"sapling_tx_ids",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[
"sapling_tx_ids: no entries",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Loading
Loading