diff --git a/CHANGELOG.md b/CHANGELOG.md
index eb6061cb9d..5302fc60c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,28 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
- `get-tenure-info?` added
- `get-block-info?` removed
+## [2.5.0.0.7]
+
+### Added
+
+- Add warn logs for block validate rejections (#5079)
+- Neon mock miner replay (#5060)
+
+### Changed
+
+- Revert BurnchainHeaderHash serialization change (#5094)
+- boot_to_epoch_3 in SignerTest should wait for a new commit (#5087)
+- Fix block proposal rejection test (#5084)
+- Mock signing revamp (#5070)
+- Multi miner fixes jude (#5040)
+- Remove spurious deadlock condition whenever the sortition DB is opened
+
+## [2.5.0.0.6]
+
+### Changed
+
+- If there is a getchunk/putchunk that fails due to a stale (or future) version NACK, the StackerDB sync state machine should immediately retry sync (#5066)
+
## [2.5.0.0.5]
### Added
diff --git a/clarity/src/vm/database/sqlite.rs b/clarity/src/vm/database/sqlite.rs
index dc3ad4f5bd..7d2af59eb5 100644
--- a/clarity/src/vm/database/sqlite.rs
+++ b/clarity/src/vm/database/sqlite.rs
@@ -21,7 +21,7 @@ use rusqlite::{
};
use stacks_common::types::chainstate::{BlockHeaderHash, StacksBlockId};
use stacks_common::types::sqlite::NO_PARAMS;
-use stacks_common::util::db_common::tx_busy_handler;
+use stacks_common::util::db::tx_busy_handler;
use stacks_common::util::hash::Sha512Trunc256Sum;
use super::clarity_store::{make_contract_hash_key, ContractCommitment};
diff --git a/stacks-common/src/util/db.rs b/stacks-common/src/util/db.rs
new file mode 100644
index 0000000000..89fe4677c7
--- /dev/null
+++ b/stacks-common/src/util/db.rs
@@ -0,0 +1,94 @@
+// Copyright (C) 2013-2020 Blockstack PBC, a public benefit corporation
+// Copyright (C) 2020 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+use std::backtrace::Backtrace;
+use std::sync::{LazyLock, Mutex};
+use std::thread;
+use std::time::{Duration, Instant};
+
+use hashbrown::HashMap;
+use rand::{thread_rng, Rng};
+use rusqlite::Connection;
+
+use crate::util::sleep_ms;
+
+/// Keep track of DB locks, for deadlock debugging
+/// - **key:** `rusqlite::Connection` debug print
+/// - **value:** Lock holder (thread name + timestamp)
+///
+/// This uses a `Mutex` inside of `LazyLock` because:
+/// - Using `Mutex` alone, it can't be statically initialized because `HashMap::new()` isn't `const`
+/// - Using `LazyLock` alone doesn't allow interior mutability
+static LOCK_TABLE: LazyLock>> =
+ LazyLock::new(|| Mutex::new(HashMap::new()));
+/// Generate timestanps for use in `LOCK_TABLE`
+/// `Instant` is preferable to `SystemTime` because it uses `CLOCK_MONOTONIC` and is not affected by NTP adjustments
+static LOCK_TABLE_TIMER: LazyLock = LazyLock::new(Instant::now);
+
+/// Call when using an operation which locks a database
+/// Updates `LOCK_TABLE`
+pub fn update_lock_table(conn: &Connection) {
+ let timestamp = LOCK_TABLE_TIMER.elapsed().as_millis();
+ // The debug format for `Connection` includes the path
+ let k = format!("{conn:?}");
+ let v = format!("{:?}@{timestamp}", thread::current().name());
+ LOCK_TABLE.lock().unwrap().insert(k, v);
+}
+
+/// Called by `rusqlite` if we are waiting too long on a database lock
+/// If called too many times, will assume a deadlock and panic
+pub fn tx_busy_handler(run_count: i32) -> bool {
+ const TIMEOUT: Duration = Duration::from_secs(300);
+ const AVG_SLEEP_TIME_MS: u64 = 100;
+
+ // First, check if this is taking unreasonably long. If so, it's probably a deadlock
+ let run_count = run_count.unsigned_abs();
+ let approx_time_elapsed =
+ Duration::from_millis(AVG_SLEEP_TIME_MS.saturating_mul(u64::from(run_count)));
+ if approx_time_elapsed > TIMEOUT {
+ error!("Deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs();
+ "run_count" => run_count,
+ "backtrace" => ?Backtrace::capture()
+ );
+ for (k, v) in LOCK_TABLE.lock().unwrap().iter() {
+ error!("Database '{k}' last locked by {v}");
+ }
+ panic!("Deadlock in thread {:?}", thread::current().name());
+ }
+
+ let mut sleep_time_ms = 2u64.saturating_pow(run_count);
+
+ sleep_time_ms = sleep_time_ms.saturating_add(thread_rng().gen_range(0..sleep_time_ms));
+
+ if sleep_time_ms > AVG_SLEEP_TIME_MS {
+ let jitter = 10;
+ sleep_time_ms =
+ thread_rng().gen_range((AVG_SLEEP_TIME_MS - jitter)..(AVG_SLEEP_TIME_MS + jitter));
+ }
+
+ let msg = format!("Database is locked; sleeping {sleep_time_ms}ms and trying again");
+ if run_count > 10 && run_count % 10 == 0 {
+ warn!("{msg}";
+ "run_count" => run_count,
+ "backtrace" => ?Backtrace::capture()
+ );
+ } else {
+ debug!("{msg}");
+ }
+
+ sleep_ms(sleep_time_ms);
+ true
+}
diff --git a/stacks-common/src/util/mod.rs b/stacks-common/src/util/mod.rs
index 13ab79dcb3..a9dfc47806 100644
--- a/stacks-common/src/util/mod.rs
+++ b/stacks-common/src/util/mod.rs
@@ -19,6 +19,7 @@ pub mod log;
#[macro_use]
pub mod macros;
pub mod chunked_encoding;
+pub mod db;
pub mod hash;
pub mod pair;
pub mod pipe;
@@ -85,32 +86,6 @@ impl error::Error for HexError {
}
}
-pub mod db_common {
- use std::{thread, time};
-
- use rand::{thread_rng, Rng};
-
- pub fn tx_busy_handler(run_count: i32) -> bool {
- let mut sleep_count = 10;
- if run_count > 0 {
- sleep_count = 2u64.saturating_pow(run_count as u32);
- }
- sleep_count = sleep_count.saturating_add(thread_rng().gen::() % sleep_count);
-
- if sleep_count > 5000 {
- sleep_count = 5000;
- }
-
- debug!(
- "Database is locked; sleeping {}ms and trying again",
- &sleep_count
- );
-
- thread::sleep(time::Duration::from_millis(sleep_count));
- true
- }
-}
-
/// Write any `serde_json` object directly to a file
pub fn serialize_json_to_file(json: &J, path: P) -> Result<(), std::io::Error>
where
diff --git a/stackslib/src/util_lib/db.rs b/stackslib/src/util_lib/db.rs
index f54a9c97ec..53f597daa2 100644
--- a/stackslib/src/util_lib/db.rs
+++ b/stackslib/src/util_lib/db.rs
@@ -18,7 +18,7 @@ use std::backtrace::Backtrace;
use std::io::Error as IOError;
use std::ops::{Deref, DerefMut};
use std::path::{Path, PathBuf};
-use std::time::Duration;
+use std::time::{Duration, SystemTime};
use std::{error, fmt, fs, io};
use clarity::vm::types::QualifiedContractIdentifier;
@@ -32,6 +32,7 @@ use serde_json::Error as serde_error;
use stacks_common::types::chainstate::{SortitionId, StacksAddress, StacksBlockId, TrieHash};
use stacks_common::types::sqlite::NO_PARAMS;
use stacks_common::types::Address;
+use stacks_common::util::db::update_lock_table;
use stacks_common::util::hash::to_hex;
use stacks_common::util::secp256k1::{Secp256k1PrivateKey, Secp256k1PublicKey};
use stacks_common::util::sleep_ms;
@@ -655,32 +656,9 @@ impl<'a, C: Clone, T: MarfTrieId> DerefMut for IndexDBTx<'a, C, T> {
}
}
+/// Called by `rusqlite` if we are waiting too long on a database lock
pub fn tx_busy_handler(run_count: i32) -> bool {
- let mut sleep_count = 2;
- if run_count > 0 {
- sleep_count = 2u64.saturating_pow(run_count as u32);
- }
- sleep_count = sleep_count.saturating_add(thread_rng().gen::() % sleep_count);
-
- if sleep_count > 100 {
- let jitter = thread_rng().gen::() % 20;
- sleep_count = 100 - jitter;
- }
-
- debug!(
- "Database is locked; sleeping {}ms and trying again",
- &sleep_count;
- "backtrace" => ?{
- if run_count > 10 && run_count % 10 == 0 {
- Some(Backtrace::capture())
- } else {
- None
- }
- },
- );
-
- sleep_ms(sleep_count);
- true
+ stacks_common::util::db::tx_busy_handler(run_count)
}
/// Begin an immediate-mode transaction, and handle busy errors with exponential backoff.
@@ -697,6 +675,7 @@ pub fn tx_begin_immediate<'a>(conn: &'a mut Connection) -> Result, Erro
pub fn tx_begin_immediate_sqlite<'a>(conn: &'a mut Connection) -> Result, sqlite_error> {
conn.busy_handler(Some(tx_busy_handler))?;
let tx = Transaction::new(conn, TransactionBehavior::Immediate)?;
+ update_lock_table(tx.deref());
Ok(tx)
}
diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs
index e57e7cc177..da1c75c708 100644
--- a/testnet/stacks-node/src/nakamoto_node/miner.rs
+++ b/testnet/stacks-node/src/nakamoto_node/miner.rs
@@ -366,22 +366,32 @@ impl BlockMinerThread {
&mut attempts,
) {
Ok(x) => x,
- Err(e) => {
- match e {
- NakamotoNodeError::StacksTipChanged => {
- info!("Stacks tip changed while waiting for signatures");
- return Err(e);
- }
- NakamotoNodeError::BurnchainTipChanged => {
- info!("Burnchain tip changed while waiting for signatures");
- return Err(e);
- }
- _ => {
- error!("Error while gathering signatures: {e:?}. Will try mining again.");
- continue;
- }
+ Err(e) => match e {
+ NakamotoNodeError::StacksTipChanged => {
+ info!("Stacks tip changed while waiting for signatures";
+ "signer_sighash" => %new_block.header.signer_signature_hash(),
+ "block_height" => new_block.header.chain_length,
+ "consensus_hash" => %new_block.header.consensus_hash,
+ );
+ return Err(e);
}
- }
+ NakamotoNodeError::BurnchainTipChanged => {
+ info!("Burnchain tip changed while waiting for signatures";
+ "signer_sighash" => %new_block.header.signer_signature_hash(),
+ "block_height" => new_block.header.chain_length,
+ "consensus_hash" => %new_block.header.consensus_hash,
+ );
+ return Err(e);
+ }
+ _ => {
+ error!("Error while gathering signatures: {e:?}. Will try mining again.";
+ "signer_sighash" => %new_block.header.signer_signature_hash(),
+ "block_height" => new_block.header.chain_length,
+ "consensus_hash" => %new_block.header.consensus_hash,
+ );
+ continue;
+ }
+ },
};
new_block.header.signer_signature = signer_signature;