Skip to content

Commit

Permalink
[epoch] Fix a few data races on epoch store (MystenLabs#7451)
Browse files Browse the repository at this point in the history
We should pass epoch store down to the creation of various validator
components.
This PR fixes some of the left-over calls to epoch_store of the state.
There is one more bug that's not yet fixed in this PR, which is the
narwhal committee. Let's fix it in a separate PR.
  • Loading branch information
lxfind authored Jan 17, 2023
1 parent b277ce1 commit b47f31f
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 11 deletions.
1 change: 1 addition & 0 deletions crates/sui-core/src/authority.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1847,6 +1847,7 @@ impl AuthorityState {
.compute_object_reference())
}

// TODO: Audit every call to this function to make sure there are no data races during reconfig.
pub fn get_sui_system_state_object(&self) -> SuiResult<SuiSystemState> {
self.database.get_sui_system_state_object()
}
Expand Down
3 changes: 2 additions & 1 deletion crates/sui-core/src/authority_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ impl AuthorityServer {
));
let consensus_adapter = ConsensusAdapter::new(
consensus_client,
state.clone(),
state.name,
&state.epoch_store_for_testing(),
ConsensusAdapterMetrics::new_test(),
);

Expand Down
8 changes: 4 additions & 4 deletions crates/sui-core/src/consensus_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ use tap::prelude::*;
use tokio::task::JoinHandle;
use tokio::time;

use crate::authority::AuthorityState;
use mysten_metrics::spawn_monitored_task;
use sui_types::base_types::AuthorityName;
use sui_types::messages::ConsensusTransactionKind;
Expand Down Expand Up @@ -153,18 +152,19 @@ impl ConsensusAdapter {
/// Make a new Consensus adapter instance.
pub fn new(
consensus_client: Box<dyn SubmitToConsensus>,
authority: Arc<AuthorityState>,
authority: AuthorityName,
epoch_store: &Arc<AuthorityPerEpochStore>,
opt_metrics: OptArcConsensusAdapterMetrics,
) -> Arc<Self> {
let num_inflight_transactions = Default::default();
let this = Arc::new(Self {
consensus_client,
authority: authority.name,
authority,
num_inflight_transactions,
opt_metrics,
});
let recover = this.clone();
recover.submit_recovered(&authority.epoch_store());
recover.submit_recovered(epoch_store);
this
}

Expand Down
3 changes: 2 additions & 1 deletion crates/sui-core/src/unit_tests/consensus_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ async fn submit_transaction_to_consensus_adapter() {
// Make a new consensus adapter instance.
let adapter = ConsensusAdapter::new(
Box::new(SubmitDirectly(state.clone())),
state.clone(),
state.name,
&state.epoch_store_for_testing(),
metrics,
);

Expand Down
23 changes: 18 additions & 5 deletions crates/sui-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ use sui_core::epoch::epoch_metrics::EpochMetrics;
use sui_core::epoch::reconfiguration::ReconfigurationInitiator;
use sui_core::narwhal_manager::{NarwhalConfiguration, NarwhalManager};
use sui_json_rpc::coin_api::CoinReadApi;
use sui_types::base_types::{EpochId, TransactionDigest};
use sui_types::base_types::{AuthorityName, EpochId, TransactionDigest};
use sui_types::error::{SuiError, SuiResult};

pub struct ValidatorComponents {
Expand Down Expand Up @@ -265,6 +265,7 @@ impl SuiNode {
Self::construct_validator_components(
config,
state.clone(),
epoch_store.clone(),
checkpoint_store.clone(),
state_sync_handle.clone(),
&registry_service,
Expand Down Expand Up @@ -430,6 +431,7 @@ impl SuiNode {
async fn construct_validator_components(
config: &NodeConfig,
state: Arc<AuthorityState>,
epoch_store: Arc<AuthorityPerEpochStore>,
checkpoint_store: Arc<CheckpointStore>,
state_sync_handle: state_sync::Handle,
registry_service: &RegistryService,
Expand All @@ -440,7 +442,8 @@ impl SuiNode {

let consensus_adapter = Self::construct_consensus_adapter(
consensus_config,
state.clone(),
state.name,
&epoch_store,
&registry_service.default_registry(),
);

Expand All @@ -465,7 +468,7 @@ impl SuiNode {
state.clone(),
consensus_adapter,
checkpoint_store,
state.epoch_store().clone(),
epoch_store,
state_sync_handle,
narwhal_manager,
validator_server_handle,
Expand Down Expand Up @@ -503,6 +506,9 @@ impl SuiNode {
state.metrics.clone(),
));

// TODO: The following is a bug and could potentially lead to data races.
// We need to put the narwhal committee in the epoch store, so that we could read it here,
// instead of reading from the system state.
let system_state = state
.get_sui_system_state_object()
.expect("Reading Sui system state object cannot fail");
Expand Down Expand Up @@ -589,7 +595,8 @@ impl SuiNode {

fn construct_consensus_adapter(
consensus_config: &ConsensusConfig,
state: Arc<AuthorityState>,
authority: AuthorityName,
epoch_store: &Arc<AuthorityPerEpochStore>,
prometheus_registry: &Registry,
) -> Arc<ConsensusAdapter> {
let consensus_address = consensus_config.address().to_owned();
Expand All @@ -601,7 +608,12 @@ impl SuiNode {
let ca_metrics = ConsensusAdapterMetrics::new(prometheus_registry);
// The consensus adapter allows the authority to send user certificates through consensus.

ConsensusAdapter::new(Box::new(consensus_client), state, ca_metrics)
ConsensusAdapter::new(
Box::new(consensus_client),
authority,
epoch_store,
ca_metrics,
)
}

async fn start_grpc_validator_service(
Expand Down Expand Up @@ -739,6 +751,7 @@ impl SuiNode {
Self::construct_validator_components(
&self.config,
self.state.clone(),
self.state.epoch_store().clone(),
self.checkpoint_store.clone(),
self.state_sync.clone(),
&self.registry_service,
Expand Down

0 comments on commit b47f31f

Please sign in to comment.