Skip to content

Commit

Permalink
Add ability to reset a db to execute from genesis (MystenLabs#10013)
Browse files Browse the repository at this point in the history
## Description 

This PR allows us to reset a db to execute from genesis. The main use
case of this would be to run an instance from genesis using a db
snapshot. Follow the below steps to test:

1. Get a db snapshot. Either generate one by running stress locally and
enabling db checkpoints or download one from S3 bucket (pretty big in
size though).
2. Download the snapshot for the epoch you want to restore to the local
disk. You will find one snapshot per epoch in the S3 bucket. We need to
place the snapshot in the dir where config is pointing to. If
`db-config` in fullnode.yaml is `/opt/sui/db/authorities_db` and we want
to restore from epoch 10, we want to copy the snapshot to
`/opt/sui/db/authorities_db`like this:
```aws s3 cp s3://myBucket/dir /opt/sui/db/authorities_db/ --recursive —exclude “*” —include “epoch_10*” ```
3. Mark downloaded snapshot as live: ```mv  /opt/sui/db/authorities_db/epoch_10  /opt/sui/db/authorities_db/live```
4. Reset the downloaded db to execute from genesis with: ```cargo run --package sui-tool -- db-tool --db-path /opt/sui/db/authorities_db/live reset-db```
5. Start the sui full node: ```cargo run --release --bin sui-node -- --config-path ~/db_checkpoints/fullnode.yaml```
6. A sample fullnode.yaml config would be:
```
---
db-path:  /opt/sui/db/authorities_db
network-address: /ip4/0.0.0.0/tcp/8080/http
json-rpc-address: "0.0.0.0:9000"
websocket-address: "0.0.0.0:9001"
metrics-address: "0.0.0.0:9184"
admin-interface-port: 1337
enable-event-processing: true
grpc-load-shed: ~
grpc-concurrency-limit: ~
p2p-config:
  listen-address: "0.0.0.0:8084"
genesis:
  genesis-file-location:  <path to genesis blob for the network>
authority-store-pruning-config:
  num-latest-epoch-dbs-to-retain: 3
  epoch-db-pruning-period-secs: 3600
  num-epochs-to-retain: 18446744073709551615
  max-checkpoints-in-batch: 200
  max-transactions-in-batch: 1000
  use-range-deletion: true
```

## Test Plan 
By running it locally
  • Loading branch information
sadhansood authored Mar 30, 2023
1 parent 8059d72 commit 39dc1b8
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 3 deletions.
18 changes: 18 additions & 0 deletions crates/sui-core/src/authority/authority_store_tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,24 @@ impl AuthorityPerpetualTables {
.checkpoint_db(path)
.map_err(SuiError::StorageError)
}

pub fn reset_db_for_execution_since_genesis(&self) -> SuiResult {
// TODO: Add new tables that get added to the db automatically
self.objects.clear()?;
self.indirect_move_objects.clear()?;
self.owned_object_transaction_locks.clear()?;
self.executed_effects.clear()?;
self.events.clear()?;
self.executed_transactions_to_checkpoint.clear()?;
self.root_state_hash_by_epoch.clear()?;
self.epoch_start_configuration.clear()?;
self.pruned_checkpoint.clear()?;
self.objects
.rocksdb
.flush()
.map_err(SuiError::StorageError)?;
Ok(())
}
}

impl ObjectStore for AuthorityPerpetualTables {
Expand Down
19 changes: 19 additions & 0 deletions crates/sui-core/src/checkpoints/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,25 @@ impl CheckpointStore {
.checkpoint_db(path)
.map_err(SuiError::StorageError)
}

pub fn delete_highest_executed_checkpoint_test_only(&self) -> Result<(), TypedStoreError> {
let mut wb = self.watermarks.batch();
wb.delete_batch(
&self.watermarks,
std::iter::once(CheckpointWatermark::HighestExecuted),
)?;
wb.write()?;
Ok(())
}

pub fn reset_db_for_execution_since_genesis(&self) -> SuiResult {
self.delete_highest_executed_checkpoint_test_only()?;
self.watermarks
.rocksdb
.flush()
.map_err(SuiError::StorageError)?;
Ok(())
}
}

#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
Expand Down
56 changes: 55 additions & 1 deletion crates/sui-tool/src/db_tool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@

use self::db_dump::{dump_table, duplicate_objects_summary, list_tables, table_summary, StoreName};
use clap::Parser;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use sui_core::authority::authority_store_tables::AuthorityPerpetualTables;
use sui_core::checkpoints::CheckpointStore;
use sui_types::base_types::EpochId;
use typed_store::rocks::MetricConf;

pub mod db_dump;

Expand All @@ -15,6 +18,7 @@ pub enum DbToolCommand {
Dump(Dump),
TableSummary(Dump),
DuplicatesSummary,
ResetDB,
}

#[derive(Parser)]
Expand Down Expand Up @@ -56,6 +60,7 @@ pub fn execute_db_tool_command(db_path: PathBuf, cmd: DbToolCommand) -> anyhow::
print_db_table_summary(d.store_name, d.epoch, db_path, &d.table_name)
}
DbToolCommand::DuplicatesSummary => print_db_duplicates_summary(db_path),
DbToolCommand::ResetDB => reset_db_to_genesis(&db_path),
}
}

Expand All @@ -74,6 +79,55 @@ pub fn print_db_duplicates_summary(db_path: PathBuf) -> anyhow::Result<()> {
Ok(())
}

pub fn reset_db_to_genesis(path: &Path) -> anyhow::Result<()> {
// Follow the below steps to test:
//
// Get a db snapshot. Either generate one by running stress locally and enabling db checkpoints or download one from S3 bucket (pretty big in size though).
// Download the snapshot for the epoch you want to restore to the local disk. You will find one snapshot per epoch in the S3 bucket. We need to place the snapshot in the dir where config is pointing to. If db-config in fullnode.yaml is /opt/sui/db/authorities_db and we want to restore from epoch 10, we want to copy the snapshot to /opt/sui/db/authorities_dblike this:
// aws s3 cp s3://myBucket/dir /opt/sui/db/authorities_db/ --recursive —exclude “*” —include “epoch_10*”
// Mark downloaded snapshot as live: mv /opt/sui/db/authorities_db/epoch_10 /opt/sui/db/authorities_db/live
// Reset the downloaded db to execute from genesis with: cargo run --package sui-tool -- db-tool --db-path /opt/sui/db/authorities_db/live reset-db
// Start the sui full node: cargo run --release --bin sui-node -- --config-path ~/db_checkpoints/fullnode.yaml
// A sample fullnode.yaml config would be:
// ---
// db-path: /opt/sui/db/authorities_db
// network-address: /ip4/0.0.0.0/tcp/8080/http
// json-rpc-address: "0.0.0.0:9000"
// websocket-address: "0.0.0.0:9001"
// metrics-address: "0.0.0.0:9184"
// admin-interface-port: 1337
// enable-event-processing: true
// grpc-load-shed: ~
// grpc-concurrency-limit: ~
// p2p-config:
// listen-address: "0.0.0.0:8084"
// genesis:
// genesis-file-location: <path to genesis blob for the network>
// authority-store-pruning-config:
// num-latest-epoch-dbs-to-retain: 3
// epoch-db-pruning-period-secs: 3600
// num-epochs-to-retain: 18446744073709551615
// max-checkpoints-in-batch: 200
// max-transactions-in-batch: 1000
// use-range-deletion: true
let perpetual_db = AuthorityPerpetualTables::open_tables_read_write(
path.join("store").join("perpetual"),
MetricConf::default(),
None,
None,
);
perpetual_db.reset_db_for_execution_since_genesis()?;

let checkpoint_db = CheckpointStore::open_tables_read_write(
path.join("checkpoints"),
MetricConf::default(),
None,
None,
);
checkpoint_db.reset_db_for_execution_since_genesis()?;
Ok(())
}

pub fn print_db_table_summary(
store: StoreName,
epoch: Option<EpochId>,
Expand Down
4 changes: 2 additions & 2 deletions crates/typed-store/src/rocks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,8 @@ impl RocksDB {
delegate_call!(self.compact_range_cf(cf, start, end))
}

pub fn flush(&self) -> Result<(), rocksdb::Error> {
delegate_call!(self.flush())
pub fn flush(&self) -> Result<(), TypedStoreError> {
delegate_call!(self.flush()).map_err(|e| TypedStoreError::RocksDBError(e.into_string()))
}

pub fn checkpoint(&self, path: &Path) -> Result<(), TypedStoreError> {
Expand Down

0 comments on commit 39dc1b8

Please sign in to comment.