From 7bd8f27e5171f37da3aa1d6c6abb06b9a291fbbf Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Thu, 20 Jun 2024 13:58:46 +0300 Subject: [PATCH] fix(pruning): Check pruning in metadata calculator (#2286) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What ❔ Adds checks in the case the metadata calculator cannot proceed because of pruning. ## Why ❔ While we don't officially support distributed setup for ENs, it still looks worth it to have intelligent error messages in case metadata calculator is stuck. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [x] Code has been formatted via `zk fmt` and `zk lint`. - [x] Spellcheck has been run via `zk spellcheck`. --- .../metadata_calculator/src/recovery/mod.rs | 4 +- core/node/metadata_calculator/src/tests.rs | 40 +++++++++++++++++++ core/node/metadata_calculator/src/updater.rs | 15 +++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/core/node/metadata_calculator/src/recovery/mod.rs b/core/node/metadata_calculator/src/recovery/mod.rs index b4e91bf720e..4aee14c0c79 100644 --- a/core/node/metadata_calculator/src/recovery/mod.rs +++ b/core/node/metadata_calculator/src/recovery/mod.rs @@ -279,7 +279,9 @@ impl AsyncTreeRecovery { let actual_root_hash = tree.root_hash().await; anyhow::ensure!( actual_root_hash == snapshot.expected_root_hash, - "Root hash of recovered tree {actual_root_hash:?} differs from expected root hash {:?}", + "Root hash of recovered tree {actual_root_hash:?} differs from expected root hash {:?}. \ + If pruning is enabled and the tree is initialized some time after node recovery, \ + this is caused by snapshot storage logs getting pruned; this setup is currently not supported", snapshot.expected_root_hash ); let tree = tree.finalize().await?; diff --git a/core/node/metadata_calculator/src/tests.rs b/core/node/metadata_calculator/src/tests.rs index fbdfe6cab32..38e1a09d109 100644 --- a/core/node/metadata_calculator/src/tests.rs +++ b/core/node/metadata_calculator/src/tests.rs @@ -360,6 +360,46 @@ async fn multi_l1_batch_workflow() { } } +#[tokio::test] +async fn error_on_pruned_next_l1_batch() { + let pool = ConnectionPool::::test_pool().await; + let temp_dir = TempDir::new().expect("failed get temporary directory for RocksDB"); + let (calculator, _) = setup_calculator(temp_dir.path(), pool.clone()).await; + reset_db_state(&pool, 1).await; + run_calculator(calculator).await; + + // Add some new blocks to the storage and mock their partial pruning. + let mut storage = pool.connection().await.unwrap(); + let new_logs = gen_storage_logs(100..200, 10); + extend_db_state(&mut storage, new_logs).await; + storage + .pruning_dal() + .soft_prune_batches_range(L1BatchNumber(5), L2BlockNumber(5)) + .await + .unwrap(); + storage + .pruning_dal() + .hard_prune_batches_range(L1BatchNumber(5), L2BlockNumber(5)) + .await + .unwrap(); + // Sanity check: there should be no pruned batch headers. + let next_l1_batch_header = storage + .blocks_dal() + .get_l1_batch_header(L1BatchNumber(2)) + .await + .unwrap(); + assert!(next_l1_batch_header.is_none()); + + let (calculator, _) = setup_calculator(temp_dir.path(), pool.clone()).await; + let (_stop_sender, stop_receiver) = watch::channel(false); + let err = calculator.run(stop_receiver).await.unwrap_err(); + let err = format!("{err:#}"); + assert!( + err.contains("L1 batch #2, next to be processed by the tree, is pruned"), + "{err}" + ); +} + #[tokio::test] async fn running_metadata_calculator_with_additional_blocks() { let pool = ConnectionPool::::test_pool().await; diff --git a/core/node/metadata_calculator/src/updater.rs b/core/node/metadata_calculator/src/updater.rs index 8271865199a..2056b831566 100644 --- a/core/node/metadata_calculator/src/updater.rs +++ b/core/node/metadata_calculator/src/updater.rs @@ -103,6 +103,7 @@ impl TreeUpdater { for l1_batch_number in l1_batch_numbers { let l1_batch_number = L1BatchNumber(l1_batch_number); let Some(current_l1_batch_data) = l1_batch_data else { + Self::ensure_not_pruned(storage, l1_batch_number).await?; return Ok(l1_batch_number); }; total_logs += current_l1_batch_data.storage_logs.len(); @@ -167,6 +168,20 @@ impl TreeUpdater { Ok(last_l1_batch_number + 1) } + /// Checks whether the requested L1 batch was pruned. Right now, the tree cannot recover from this situation, + /// so we exit with an error if this happens. + async fn ensure_not_pruned( + storage: &mut Connection<'_, Core>, + l1_batch_number: L1BatchNumber, + ) -> anyhow::Result<()> { + let pruning_info = storage.pruning_dal().get_pruning_info().await?; + anyhow::ensure!( + Some(l1_batch_number) > pruning_info.last_soft_pruned_l1_batch, + "L1 batch #{l1_batch_number}, next to be processed by the tree, is pruned; the tree cannot continue operating" + ); + Ok(()) + } + async fn step( &mut self, mut storage: Connection<'_, Core>,