Skip to content

Commit db64706

Browse files
testing quorum
1 parent aac983d commit db64706

File tree

3 files changed

+102
-3
lines changed

3 files changed

+102
-3
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

common/credential-proxy/src/quorum_checker.rs

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ pub struct QuorumStateChecker {
2727
cancellation_token: CancellationToken,
2828
check_interval: Duration,
2929
quorum_state: QuorumState,
30+
max_retries: u32,
31+
retry_initial_delay: Duration,
3032
}
3133

3234
impl QuorumStateChecker {
@@ -42,6 +44,8 @@ impl QuorumStateChecker {
4244
quorum_state: QuorumState {
4345
available: Arc::new(Default::default()),
4446
},
47+
max_retries: 3,
48+
retry_initial_delay: Duration::from_secs(2),
4549
};
4650

4751
// first check MUST succeed, otherwise we shouldn't start
@@ -56,7 +60,102 @@ impl QuorumStateChecker {
5660
self.quorum_state.clone()
5761
}
5862

63+
fn is_retryable_error(&self, err: &CredentialProxyError) -> bool {
64+
let err_str = err.to_string().to_lowercase();
65+
66+
// Check for DNS-related errors
67+
if err_str.contains("dns")
68+
|| err_str.contains("lookup")
69+
|| err_str.contains("name resolution")
70+
|| err_str.contains("temporary failure")
71+
|| err_str.contains("failed to lookup address")
72+
{
73+
return true;
74+
}
75+
76+
// Check if it's a Tendermint RPC error (which could be DNS/timeout related)
77+
if let CredentialProxyError::NyxdFailure { source: nyxd_err } = err {
78+
let nyxd_err_str = nyxd_err.to_string().to_lowercase();
79+
if nyxd_err_str.contains("tendermint rpc request failed") {
80+
return true;
81+
}
82+
83+
if nyxd_err.is_tendermint_response_timeout() {
84+
return true;
85+
}
86+
}
87+
88+
false
89+
}
90+
5991
async fn check_quorum_state(&self) -> Result<bool, CredentialProxyError> {
92+
self.check_quorum_state_with_retry().await
93+
}
94+
95+
async fn check_quorum_state_with_retry(&self) -> Result<bool, CredentialProxyError> {
96+
let mut last_error_msg = None;
97+
let delay = self.retry_initial_delay;
98+
99+
for attempt in 0..=self.max_retries {
100+
match self.check_quorum_state_once().await {
101+
Ok(result) => {
102+
if attempt > 0 {
103+
info!("quorum check succeeded after {} retry attempt(s)", attempt);
104+
}
105+
return Ok(result);
106+
}
107+
Err(err) => {
108+
let err_msg = err.to_string();
109+
110+
// Check if this error is retryable
111+
if !self.is_retryable_error(&err) {
112+
return Err(err);
113+
}
114+
115+
last_error_msg = Some(err_msg.clone());
116+
117+
if attempt >= self.max_retries {
118+
break;
119+
}
120+
121+
// Log the retry attempt
122+
warn!(
123+
"quorum check failed (attempt {}/{}): {}. Retrying in {:?}...",
124+
attempt + 1,
125+
self.max_retries + 1,
126+
err_msg,
127+
delay
128+
);
129+
130+
// Wait before retrying with exponential backoff
131+
tokio::time::sleep(delay).await;
132+
}
133+
}
134+
}
135+
136+
// try one final time to get the actual error
137+
match self.check_quorum_state_once().await {
138+
Ok(result) => {
139+
warn!(
140+
"quorum check succeeded on final attempt after {} retries",
141+
self.max_retries
142+
);
143+
Ok(result)
144+
}
145+
Err(err) => {
146+
if let Some(error_msg) = last_error_msg {
147+
error!(
148+
"quorum check failed after {} retry attempts. Last error: {}",
149+
self.max_retries + 1,
150+
error_msg
151+
);
152+
}
153+
Err(err)
154+
}
155+
}
156+
}
157+
158+
async fn check_quorum_state_once(&self) -> Result<bool, CredentialProxyError> {
60159
let client_guard = self.client.query_chain().await;
61160

62161
// split the operation as we only need to hold the reference to chain client for the first part
@@ -93,7 +192,7 @@ impl QuorumStateChecker {
93192
break
94193
}
95194
_ = tokio::time::sleep(self.check_interval) => {
96-
match self.check_quorum_state().await {
195+
match self.check_quorum_state_with_retry().await {
97196
Ok(available) => self.quorum_state.available.store(available, Ordering::SeqCst),
98197
Err(err) => error!("failed to check current quorum state: {err}"),
99198
}

nym-credential-proxy/nym-credential-proxy/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "nym-credential-proxy"
3-
version = "0.3.0"
3+
version = "0.3.0-test"
44
authors.workspace = true
55
repository.workspace = true
66
homepage.workspace = true

0 commit comments

Comments
 (0)