Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: State sync from local filesystem #8913

Merged
merged 24 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

### Non-protocol Changes

* Node can sync State from S3. [#8789](https://github.com/near/nearcore/pull/8789)
* The contract runtime switched to using our fork of wasmer, with various improvements.
* undo-block tool to reset the chain head from current head to its prev block. Use the tool by running: `./target/release/neard --home {path_to_config_directory} undo-block`. [#8681](https://github.com/near/nearcore/pull/8681)
* Node can sync State from S3. [#8789](https://github.com/near/nearcore/pull/8789)
* Node can sync State from local filesystem. [#8789](https://github.com/near/nearcore/pull/8789)
* Add per shard granularity for chunks in validator info metric. [#8934](https://github.com/near/nearcore/pull/8934)

## 1.33.0
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 20 additions & 1 deletion chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4521,8 +4521,27 @@ impl Chain {
{
let prev_hash = *sync_block.header().prev_hash();
// If sync_hash is not on the Epoch boundary, it's malicious behavior
Ok(self.runtime_adapter.is_next_block_epoch_start(&prev_hash)?)
let is_next_block_epoch_start =
self.runtime_adapter.is_next_block_epoch_start(&prev_hash)?;

tracing::info!(
target: "chain",
?sync_hash,
head_epoch_id = ?head.epoch_id,
sync_block_epoch_id = ?sync_block.header().epoch_id(),
sync_block_next_epoch_id = ?sync_block.header().next_epoch_id(),
?prev_hash,
is_next_block_epoch_start,
"Failed check_sync_hash_validity, possible malicious behavior");
nikurt marked this conversation as resolved.
Show resolved Hide resolved
Ok(is_next_block_epoch_start)
} else {
tracing::info!(
target: "chain",
?sync_hash,
head_epoch_id = ?head.epoch_id,
sync_block_epoch_id = ?sync_block.header().epoch_id(),
sync_block_next_epoch_id = ?sync_block.header().next_epoch_id(),
"Failed check_sync_hash_validity");
Ok(false) // invalid Epoch of sync_hash, possible malicious behavior
}
}
Expand Down
4 changes: 2 additions & 2 deletions chain/client-primitives/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ pub enum AccountOrPeerIdOrHash {
AccountId(AccountId),
PeerId(PeerId),
Hash(CryptoHash),
ExternalStorage,
}

#[derive(Debug, serde::Serialize)]
Expand All @@ -63,7 +62,8 @@ pub struct DownloadStatus {
pub state_requests_count: u64,
pub last_target: Option<AccountOrPeerIdOrHash>,
#[serde(skip_serializing, skip_deserializing)]
pub response: Arc<Mutex<Option<Result<(u16, Vec<u8>), String>>>>,
// Use type `String` as an error to avoid a dependency on the `rust-s3` or `anyhow` crates.
pub response: Arc<Mutex<Option<Result<Vec<u8>, String>>>>,
ppca marked this conversation as resolved.
Show resolved Hide resolved
}

impl DownloadStatus {
Expand Down
22 changes: 12 additions & 10 deletions chain/client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ edition.workspace = true
actix-rt.workspace = true
actix.workspace = true
ansi_term.workspace = true
anyhow.workspace = true
async-trait.workspace = true
borsh.workspace = true
chrono.workspace = true
Expand All @@ -20,6 +21,7 @@ num-rational.workspace = true
once_cell.workspace = true
rand.workspace = true
reed-solomon-erasure.workspace = true
regex.workspace = true
rust-s3.workspace = true
serde_json.workspace = true
strum.workspace = true
Expand All @@ -28,24 +30,24 @@ thiserror.workspace = true
tokio.workspace = true
tracing.workspace = true

delay-detector.workspace = true
near-async.workspace = true
near-chain-primitives.workspace = true
near-crypto.workspace = true
near-primitives.workspace = true
near-store.workspace = true
near-chain-configs.workspace = true
near-chain-primitives.workspace = true
near-chain.workspace = true
near-chunks.workspace = true
near-client-primitives.workspace = true
near-crypto.workspace = true
near-dyn-configs.workspace = true
near-epoch-manager.workspace = true
near-network.workspace = true
near-pool.workspace = true
near-chunks.workspace = true
near-telemetry.workspace = true
near-o11y.workspace = true
near-performance-metrics.workspace = true
near-performance-metrics-macros.workspace = true
near-epoch-manager.workspace = true
delay-detector.workspace = true
near-performance-metrics.workspace = true
near-pool.workspace = true
near-primitives.workspace = true
near-store.workspace = true
near-telemetry.workspace = true

[dev-dependencies]
assert_matches.workspace = true
Expand Down
10 changes: 2 additions & 8 deletions chain/client/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,7 @@ impl Client {
network_adapter.clone(),
config.state_sync_timeout,
&config.chain_id,
config.state_sync_from_s3_enabled,
&config.state_sync_s3_bucket,
&config.state_sync_s3_region,
config.state_sync_num_concurrent_s3_requests,
&config.state_sync.sync,
);
let num_block_producer_seats = config.num_block_producer_seats as usize;
let data_parts = runtime_adapter.num_data_parts();
Expand Down Expand Up @@ -2133,10 +2130,7 @@ impl Client {
network_adapter1,
state_sync_timeout,
&self.config.chain_id,
self.config.state_sync_from_s3_enabled,
&self.config.state_sync_s3_bucket,
&self.config.state_sync_s3_region,
self.config.state_sync_num_concurrent_s3_requests,
&self.config.state_sync.sync,
),
new_shard_sync,
BlocksCatchUpState::new(sync_hash, epoch_id),
Expand Down
6 changes: 5 additions & 1 deletion chain/client/src/client_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,11 @@ impl Handler<WithSpanContext<Status>> for ClientActor {
sync_status: format!(
"{} ({})",
self.client.sync_status.as_variant_name().to_string(),
display_sync_status(&self.client.sync_status, &self.client.chain.head()?,),
display_sync_status(
&self.client.sync_status,
&self.client.chain.head()?,
&self.client.config.state_sync.sync,
),
),
catchup_status: self.client.get_catchup_status()?,
current_head_status: head.clone().into(),
Expand Down
32 changes: 19 additions & 13 deletions chain/client/src/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::config_updater::ConfigUpdater;
use crate::{metrics, SyncStatus};
use actix::Addr;
use itertools::Itertools;
use near_chain_configs::{ClientConfig, LogSummaryStyle};
use near_chain_configs::{ClientConfig, LogSummaryStyle, SyncConfig};
use near_network::types::NetworkInfo;
use near_primitives::block::Tip;
use near_primitives::network::PeerId;
Expand Down Expand Up @@ -279,10 +279,9 @@ impl InfoHelper {

let s = |num| if num == 1 { "" } else { "s" };

let sync_status_log = Some(display_sync_status(sync_status, head));

let sync_status_log =
Some(display_sync_status(sync_status, head, &client_config.state_sync.sync));
let catchup_status_log = display_catchup_status(catchup_status);

let validator_info_log = validator_info.as_ref().map(|info| {
format!(
" {}{} validator{}",
Expand Down Expand Up @@ -500,7 +499,11 @@ pub fn display_catchup_status(catchup_status: Vec<CatchupStatusView>) -> String
.join("\n")
}

pub fn display_sync_status(sync_status: &SyncStatus, head: &Tip) -> String {
pub fn display_sync_status(
sync_status: &SyncStatus,
head: &Tip,
state_sync_config: &SyncConfig,
) -> String {
metrics::SYNC_STATUS.set(sync_status.repr() as i64);
match sync_status {
SyncStatus::AwaitingPeers => format!("#{:>8} Waiting for peers", head.height),
Expand Down Expand Up @@ -545,14 +548,17 @@ pub fn display_sync_status(sync_status: &SyncStatus, head: &Tip) -> String {
for (shard_id, shard_status) in shard_statuses {
write!(res, "[{}: {}]", shard_id, shard_status.status.to_string(),).unwrap();
}
// TODO #8719
tracing::warn!(target: "stats",
"The node is syncing its State. The current implementation of this mechanism is known to be unreliable. It may never complete, or fail randomly and corrupt the DB.\n\
Suggestions:\n\
* Download a recent data snapshot and restart the node.\n\
* Disable state sync in the config. Add `\"state_sync_enabled\": false` to `config.json`.\n\
\n\
A better implementation of State Sync is work in progress.");
if matches!(state_sync_config, SyncConfig::Peers) {
// TODO #8719
tracing::warn!(
target: "stats",
"The node is syncing its State. The current implementation of this mechanism is known to be unreliable. It may never complete, or fail randomly and corrupt the DB.\n\
Suggestions:\n\
* Download a recent data snapshot and restart the node.\n\
* Disable state sync in the config. Add `\"state_sync_enabled\": false` to `config.json`.\n\
\n\
A better implementation of State Sync is work in progress.");
}
res
}
SyncStatus::StateSyncDone => "State sync done".to_string(),
Expand Down
Loading