From 28661b39b272b9190860f98b1ce064c6d5df0eef Mon Sep 17 00:00:00 2001 From: cchudant Date: Thu, 26 Sep 2024 05:46:34 +0000 Subject: [PATCH] refactor: fix iterator db loading, replace preload_nodes with iterator --- src/bonsai_database.rs | 2 +- src/databases/rocks_db.rs | 6 ++ src/tests/proptest.rs | 28 ++++++- src/trie/iterator.rs | 32 ++++++-- src/trie/tree.rs | 169 ++++++++------------------------------ 5 files changed, 92 insertions(+), 145 deletions(-) diff --git a/src/bonsai_database.rs b/src/bonsai_database.rs index 7118774..592cb8d 100644 --- a/src/bonsai_database.rs +++ b/src/bonsai_database.rs @@ -27,7 +27,7 @@ pub trait DBError: Error + Send + Sync {} pub trait DBError: Send + Sync {} /// Trait to be implemented on any type that can be used as a database. -pub trait BonsaiDatabase { +pub trait BonsaiDatabase: core::fmt::Debug { type Batch: Default; #[cfg(feature = "std")] type DatabaseError: Error + DBError; diff --git a/src/databases/rocks_db.rs b/src/databases/rocks_db.rs index 1e2f176..92350c7 100644 --- a/src/databases/rocks_db.rs +++ b/src/databases/rocks_db.rs @@ -173,6 +173,12 @@ pub struct RocksDBTransaction<'a> { column_families: HashMap>, } +impl<'a> fmt::Debug for RocksDBTransaction<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("RocksDBTransaction").finish() + } +} + impl<'db, ID> BonsaiDatabase for RocksDB<'db, ID> where ID: Id, diff --git a/src/tests/proptest.rs b/src/tests/proptest.rs index 869b828..6ce724c 100644 --- a/src/tests/proptest.rs +++ b/src/tests/proptest.rs @@ -4,9 +4,9 @@ use crate::id::BasicId; use crate::key_value_db::KeyValueDB; use crate::trie::tree::MerkleTree; use crate::{BitVec, HashMap}; -use core::fmt::{self, Debug}; use bitvec::bitvec; use bitvec::order::Msb0; +use core::fmt::{self, Debug}; use proptest::prelude::*; use proptest_derive::Arbitrary; use smallvec::smallvec; @@ -121,7 +121,7 @@ impl MerkleTreeInsertProblem { } proptest::proptest! { - #![proptest_config(ProptestConfig::with_cases(5))] // comment this when developing, this is mostly for faster ci & whole workspace `cargo test` + // #![proptest_config(ProptestConfig::with_cases(5))] // comment this when developing, this is mostly for faster ci & whole workspace `cargo test` #[test] fn proptest_inserts(pb in any::()) { let _ = env_logger::builder().is_test(true).try_init(); @@ -275,3 +275,27 @@ fn test_merkle_pb_6() { pb.check(); } + +#[test] +fn test_merkle_pb_7() { + use Step::*; + let _ = env_logger::builder().is_test(true).try_init(); + log::set_max_level(log::LevelFilter::Trace); + let pb = MerkleTreeInsertProblem(vec![ + Insert( + Key(bitvec![u8, Msb0; 0,0,0,0,0]), + Value(Felt::from_hex("0x20").unwrap()), + ), + Insert( + Key(bitvec![u8, Msb0; 1,0,0,0,0]), + Value(Felt::from_hex("0x20").unwrap()), + ), + Commit, + Insert( + Key(bitvec![u8, Msb0; 0,0,0,0,0]), + Value(Felt::from_hex("0x40").unwrap()), + ), + ]); + + pb.check(); +} diff --git a/src/trie/iterator.rs b/src/trie/iterator.rs index 3f91a06..c0eb73e 100644 --- a/src/trie/iterator.rs +++ b/src/trie/iterator.rs @@ -6,7 +6,7 @@ use super::{ TrieKey, }; use crate::{ - id::Id, key_value_db::KeyValueDB, BitSlice, BonsaiDatabase, BonsaiStorageError, ByteVec, + id::Id, key_value_db::KeyValueDB, trie::tree::NodesMapping, BitSlice, BonsaiDatabase, BonsaiStorageError, ByteVec }; use core::fmt; use starknet_types_core::hash::StarkHash; @@ -42,6 +42,7 @@ impl<'a, H: StarkHash, DB: BonsaiDatabase, ID: Id> MerkleTreeIterator<'a, H, DB, } } + #[cfg(test)] pub fn cur_nodes(&self) -> Vec { self.cur_path_nodes_heights .iter() @@ -164,7 +165,7 @@ impl<'a, H: StarkHash, DB: BonsaiDatabase, ID: Id> MerkleTreeIterator<'a, H, DB, let path: ByteVec = self.current_path.clone().into(); log::trace!("Visiting db node {:?}", self.current_path); let key = TrieKey::new(&self.tree.identifier, TrieKeyType::Trie, &path); - let Some((node_id, node)) = self.tree.node_storage.nodes.load_db_node( + let Some((node_id, node)) = NodesMapping::load_db_node_get_id( &mut self.tree.node_storage.latest_node_id, &self.tree.death_row, &self.db, @@ -176,6 +177,8 @@ impl<'a, H: StarkHash, DB: BonsaiDatabase, ID: Id> MerkleTreeIterator<'a, H, DB, "Could not get node from db".to_string(), )); }; + *next_to_visit = NodeHandle::InMemory(node_id); + let node = self.tree.node_storage.nodes.load_db_node_to_id::(node_id, node)?; (node_id, node) } NodeHandle::InMemory(node_id) => { @@ -291,17 +294,26 @@ mod tests { // from scratch, should find the leaf iter.seek_to(&bits![u8, Msb0; 0,0,0,1,0,0,0,0]).unwrap(); assert_eq!(iter.current_value, Some(NodeHandle::Hash(ONE))); - assert_eq!(iter.cur_nodes(), vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(2)]); + assert_eq!( + iter.cur_nodes(), + vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(2)] + ); println!("{iter:?}"); // from a closeby leaf, should backtrack and find the next one iter.seek_to(&bits![u8, Msb0; 0,0,0,1,0,0,0,1]).unwrap(); assert_eq!(iter.current_value, Some(NodeHandle::Hash(TWO))); - assert_eq!(iter.cur_nodes(), vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(2)]); + assert_eq!( + iter.cur_nodes(), + vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(2)] + ); println!("{iter:?}"); // backtrack farther, should find the leaf iter.seek_to(&bits![u8, Msb0; 0,0,0,1,0,0,1,0]).unwrap(); assert_eq!(iter.current_value, Some(NodeHandle::Hash(THREE))); - assert_eq!(iter.cur_nodes(), vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(3)]); + assert_eq!( + iter.cur_nodes(), + vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(3)] + ); println!("{iter:?}"); // backtrack farther, should find the leaf iter.seek_to(&bits![u8, Msb0; 0,1,0,0,0,0,0,0]).unwrap(); @@ -312,7 +324,10 @@ mod tests { // similar case iter.seek_to(&bits![u8, Msb0; 0,0,0,1,0,0,0,1]).unwrap(); assert_eq!(iter.current_value, Some(NodeHandle::Hash(TWO))); - assert_eq!(iter.cur_nodes(), vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(2)]); + assert_eq!( + iter.cur_nodes(), + vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4), NodeId(2)] + ); println!("{iter:?}"); // SEEK MIDWAY INTO THE TREE @@ -340,7 +355,10 @@ mod tests { // The current value should reflect the binary node assert_eq!(iter.current_value, Some(NodeHandle::InMemory(NodeId(2)))); assert_eq!(iter.current_path.0, bits![u8, Msb0; 0,0,0,1,0,0,0]); - assert_eq!(iter.cur_nodes(), vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4)]); + assert_eq!( + iter.cur_nodes(), + vec![NodeId(1), NodeId(7), NodeId(6), NodeId(4)] + ); println!("{iter:?}"); // jump to the end of an edge diff --git a/src/trie/tree.rs b/src/trie/tree.rs index a813bd2..7253147 100644 --- a/src/trie/tree.rs +++ b/src/trie/tree.rs @@ -70,7 +70,7 @@ impl NodesMapping { // funky thinggy to make borrow checker happy root_node @ None => { // load the node - let node = self.load_db_node( + let node = Self::load_db_node_get_id( latest_node_id, death_row, db, @@ -79,6 +79,7 @@ impl NodesMapping { match node { Some((id, n)) => { + let n = self.load_db_node_to_id::(id, n)?; *root_node = Some(RootHandle::Loaded(id)); Ok(Some((id, n))) } @@ -91,29 +92,41 @@ impl NodesMapping { } } - pub(crate) fn load_db_node<'a, DB: BonsaiDatabase, ID: Id>( + /// Two phase init: first get a new slot, which does not borrow into the node storage + /// Then, set the node at that target. + /// This allows to update the parent node pointer in the first step, which cannot be done in the second + /// step without having to drop the &mut Node because it borrows into the node storage. The alternative + /// would involve a double-lookup. + pub(crate) fn load_db_node_to_id<'a, DB: BonsaiDatabase>( &'a mut self, + target_id: NodeId, + db_node: Node, + ) -> Result<&'a mut Node, BonsaiStorageError> { + // Insert and return reference at the same time. Entry occupied case should not be possible. + match self.0.entry(target_id) { + hash_map::Entry::Occupied(_) => Err(BonsaiStorageError::Trie( + "Duplicate node id in storage".to_string(), + )), + hash_map::Entry::Vacant(entry) => Ok(entry.insert(db_node)), + } + } + + /// First step of two phase init. + pub(crate) fn load_db_node_get_id<'a, DB: BonsaiDatabase, ID: Id>( latest_node_id: &mut NodeId, death_row: &HashSet, db: &KeyValueDB, key: &TrieKey, - ) -> Result, BonsaiStorageError> { + ) -> Result, BonsaiStorageError> { if death_row.contains(key) { return Ok(None); } - let node = db.get(key)?; let Some(node) = node else { return Ok(None) }; let node = Node::decode(&mut node.as_slice())?; let node_id = latest_node_id.next_id(); - // Insert and return reference at the same time. Entry occupied case should not be possible. - match self.0.entry(node_id) { - hash_map::Entry::Occupied(_) => Err(BonsaiStorageError::Trie( - "duplicate node id in storage".to_string(), - )), - hash_map::Entry::Vacant(entry) => Ok(Some((node_id, entry.insert(node)))), - } + Ok(Some((node_id, node))) } } @@ -556,12 +569,11 @@ impl MerkleTree { return Ok(()); } } + let mut iter = self.iter(db); iter.seek_to(key)?; - log::trace!("iter={iter:?}"); - let path_nodes_ = iter.cur_path_nodes_heights.into_iter().map(|n| n.0).collect::>(); - let (path_nodes, _path) = self.preload_nodes(db, key)?; - assert_eq!(path_nodes_, path_nodes); + let path_nodes = iter.cur_path_nodes_heights; + // There are three possibilities. // // 1. The leaf exists, in which case we simply change its value. @@ -582,7 +594,7 @@ impl MerkleTree { log::trace!("preload nodes: {:?}", path_nodes); use Node::*; match path_nodes.last() { - Some(node_id) => { + Some((node_id, _)) => { let mut nodes_to_add = Vec::new(); self.node_storage .nodes @@ -771,12 +783,14 @@ impl MerkleTree { } leaf_entry.insert(InsertOrRemove::Remove); - let (path_nodes, _path) = self.preload_nodes(db, key)?; + let mut iter = self.iter(db); + iter.seek_to(key)?; + let path_nodes = iter.cur_path_nodes_heights; let mut last_binary_path = Path(key.to_bitvec()); // Go backwards until we hit a branch node. - let mut node_iter = path_nodes.into_iter().rev().skip_while(|node| { + let mut node_iter = path_nodes.into_iter().rev().skip_while(|(node, _)| { let node = match self.node_storage.nodes.0.entry(*node) { hash_map::Entry::Occupied(entry) => entry, // SAFETY: Has been populate by preload_nodes just above @@ -816,7 +830,7 @@ impl MerkleTree { ); match branch_node { - Some(node_id) => { + Some((node_id, _)) => { let (new_edge, par_path) = { let node = self.node_storage.nodes.0.get_mut(&node_id).ok_or( BonsaiStorageError::Trie("Node not found in memory".to_string()), @@ -848,7 +862,7 @@ impl MerkleTree { (edge, cl) }; // Check the parent of the new edge. If it is also an edge, then they must merge. - if let Some(parent_node_id) = parent_branch_node { + if let Some((parent_node_id, _)) = parent_branch_node { // Get a mutable reference to the parent node to merge them let parent_node = self.node_storage.nodes.0.get_mut(&parent_node_id).ok_or( BonsaiStorageError::Trie("Node not found in memory".to_string()), @@ -954,121 +968,6 @@ impl MerkleTree { db.contains(&TrieKey::new(&self.identifier, TrieKeyType::Flat, &key)) } - /// preload_nodes from the current root towards the destination [Leaf](Node::Leaf) node. - /// If the destination node exists, it will be the final node in the list. - /// - /// This means that the final node will always be either a the destination [Leaf](Node::Leaf) - /// node, or an [Edge](Node::Edge) node who's path suffix does not match the leaf's path. - /// - /// The final node can __not__ be a [Binary](Node::Binary) node since it would always be - /// possible to continue on towards the destination. Nor can it be an - /// [Unresolved](Node::Unresolved) node since this would be resolved to check if we can - /// travel further. - /// - /// # Arguments - /// - /// * `dst` - The node to get to. - /// - /// # Returns - /// - /// The list of nodes along the path. - fn preload_nodes( - &mut self, - db: &KeyValueDB, - dst: &BitSlice, - ) -> Result<(Vec, Path), BonsaiStorageError> { - let mut nodes = Vec::with_capacity(251); - let mut path = Path::default(); - - let mut prev_handle = None::<&mut NodeHandle>; // None signals tree root - - loop { - // get node from cache or database - let (node_id, node) = match prev_handle { - // tree root - None => { - match self.node_storage.nodes.get_root_node( - &mut self.node_storage.root_node, - &mut self.node_storage.latest_node_id, - &self.death_row, - &self.identifier, - db, - )? { - Some((node_id, node)) => (node_id, node), - None => { - // empty tree - return Ok((nodes, path)); - } - } - } - // not tree root - Some(prev_handle) => match prev_handle { - NodeHandle::Hash(_) => { - // load from db - let Some(node) = Self::get_trie_branch_in_db_from_path( - &self.death_row, - &self.identifier, - db, - &path, - )? - else { - // end of path traversal - break; - }; - - // put it in inmemory storage - self.node_storage.latest_node_id.next_id(); - *prev_handle = NodeHandle::InMemory(self.node_storage.latest_node_id); - let node = self - .node_storage - .nodes - .0 - .entry(self.node_storage.latest_node_id) - .insert(node); - - (self.node_storage.latest_node_id, node.into_mut()) - } - NodeHandle::InMemory(node_id) => { - let node_id = *node_id; - - let node = self.node_storage.nodes.0.get_mut(&node_id).ok_or( - BonsaiStorageError::Trie( - "Couldn't get node from temp storage".to_string(), - ), - )?; - - (node_id, node) - } - }, - }; - - nodes.push(node_id); - - // visit the child - match node { - Node::Binary(binary_node) => { - let next_direction = binary_node.direction(dst); - path.0.push(bool::from(next_direction)); - prev_handle = Some(binary_node.get_child_mut(next_direction)); - } - - Node::Edge(edge_node) if edge_node.path_matches(dst) => { - path.0.extend_from_bitslice(&edge_node.path.0); - if path.0 == dst { - break; // found it :) - } - - prev_handle = Some(&mut edge_node.child); - } - - // We are in a case where the edge node doesn't match the path we want to preload so we return nothing. - Node::Edge(_) => break, - } - } - - Ok((nodes, path)) - } - /// Get the node of the trie that corresponds to the path. fn get_trie_branch_in_db_from_path( death_row: &HashSet,