From 3bd185f82481b070e3f8431aef38c92221a081f3 Mon Sep 17 00:00:00 2001
From: wileyj <2847772+wileyj@users.noreply.github.com>
Date: Mon, 12 Aug 2024 17:32:11 -0700
Subject: [PATCH 01/13] Adding changes for 2.5.0.0.6

---
 CHANGELOG.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index eb6061cb9d..2cca1d273a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,27 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
     - `get-tenure-info?` added
     - `get-block-info?` removed
 
+## [2.5.0.0.7]
+
+### Added
+
+feat: Neon mock miner replay (#5060)
+chore: add warn logs for block validate rejections (#5079)
+
+### Changed
+
+- bugfix/boot_to_epoch_3 in SignerTest should wait for a new commit (#5087)
+- Fix: Revert BurnchainHeaderHash serialization change (#5094)
+- Feat/mock signing revamp (#5070)
+- Fix block proposal rejection test (#5084)
+- Fix/multi miner fixes jude (#5040)
+
+## [2.5.0.0.6]
+
+### Changed
+
+- If there is a getchunk/putchunk that fails due to a stale (or future) version NACK, the StackerDB sync state machine should immediately retry sync (#5066)
+
 ## [2.5.0.0.5]
 
 ### Added

From 88c95c6817f3412d5dc7b41c3d0dc8a9b3070ed3 Mon Sep 17 00:00:00 2001
From: wileyj <2847772+wileyj@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:51:47 -0700
Subject: [PATCH 02/13] Update changelog for release

---
 CHANGELOG.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2cca1d273a..0f9a419f1d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,16 +21,16 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
 
 ### Added
 
-feat: Neon mock miner replay (#5060)
-chore: add warn logs for block validate rejections (#5079)
+- Neon mock miner replay (#5060)
+- Add warn logs for block validate rejections (#5079)
 
 ### Changed
 
-- bugfix/boot_to_epoch_3 in SignerTest should wait for a new commit (#5087)
-- Fix: Revert BurnchainHeaderHash serialization change (#5094)
-- Feat/mock signing revamp (#5070)
+- boot_to_epoch_3 in SignerTest should wait for a new commit (#5087)
+- Revert BurnchainHeaderHash serialization change (#5094)
+- Mock signing revamp (#5070)
 - Fix block proposal rejection test (#5084)
-- Fix/multi miner fixes jude (#5040)
+- Multi miner fixes jude (#5040)
 
 ## [2.5.0.0.6]
 

From 1584e9c422782e6e5e3f342f2e334f0e3d9e41b1 Mon Sep 17 00:00:00 2001
From: wileyj <2847772+wileyj@users.noreply.github.com>
Date: Wed, 28 Aug 2024 10:52:51 -0700
Subject: [PATCH 03/13] Update changelog for release

---
 CHANGELOG.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0f9a419f1d..2add5b99f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,15 +21,15 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
 
 ### Added
 
-- Neon mock miner replay (#5060)
 - Add warn logs for block validate rejections (#5079)
+- Neon mock miner replay (#5060)
 
 ### Changed
 
-- boot_to_epoch_3 in SignerTest should wait for a new commit (#5087)
 - Revert BurnchainHeaderHash serialization change (#5094)
-- Mock signing revamp (#5070)
+- boot_to_epoch_3 in SignerTest should wait for a new commit (#5087)
 - Fix block proposal rejection test (#5084)
+- Mock signing revamp (#5070)
 - Multi miner fixes jude (#5040)
 
 ## [2.5.0.0.6]

From 9eef4066cce42414a0d0690de4da23027b8292ba Mon Sep 17 00:00:00 2001
From: Jude Nelson <judecn@gmail.com>
Date: Wed, 28 Aug 2024 14:49:59 -0400
Subject: [PATCH 04/13] chore: remove spurious deadlock condition arising from
 needlessly opening a transaction whenever we open the sortition DB

---
 stackslib/src/chainstate/burn/db/sortdb.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/stackslib/src/chainstate/burn/db/sortdb.rs b/stackslib/src/chainstate/burn/db/sortdb.rs
index 15a3bf5641..b538ae17fa 100644
--- a/stackslib/src/chainstate/burn/db/sortdb.rs
+++ b/stackslib/src/chainstate/burn/db/sortdb.rs
@@ -3456,6 +3456,14 @@ impl SortitionDB {
                         SortitionDB::apply_schema_9(&tx.deref(), epochs)?;
                         tx.commit()?;
                     } else if version == expected_version {
+                        // this transaction is almost never needed
+                        let validated_epochs = StacksEpoch::validate_epochs(epochs);
+                        let existing_epochs = Self::get_stacks_epochs(self.conn())?;
+                        if existing_epochs == validated_epochs {
+                            return Ok(());
+                        }
+
+                        // epochs are out of date
                         let tx = self.tx_begin()?;
                         SortitionDB::validate_and_replace_epochs(&tx, epochs)?;
                         tx.commit()?;

From afdaaf6b04d72b1d3c58f42bf7644b975ef1ded1 Mon Sep 17 00:00:00 2001
From: wileyj <2847772+wileyj@users.noreply.github.com>
Date: Wed, 4 Sep 2024 10:47:07 -0700
Subject: [PATCH 05/13] Add deadlock fix addtion to changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2add5b99f6..5302fc60c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,7 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
 - Fix block proposal rejection test (#5084)
 - Mock signing revamp (#5070)
 - Multi miner fixes jude (#5040)
+- Remove spurious deadlock condition whenever the sortition DB is opened
 
 ## [2.5.0.0.6]
 

From 47c562a9d586a325a28629b5b9ede0203d0be8c5 Mon Sep 17 00:00:00 2001
From: Jude Nelson <judecn@gmail.com>
Date: Thu, 5 Sep 2024 22:20:25 -0400
Subject: [PATCH 06/13] fix: make a stackerdb shrink if its signer list becomes
 smaller than that in the DB

---
 stackslib/src/net/stackerdb/db.rs       | 11 +++
 stackslib/src/net/stackerdb/mod.rs      |  8 ++-
 stackslib/src/net/stackerdb/tests/db.rs | 96 +++++++++++++++++++++++++
 3 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/stackslib/src/net/stackerdb/db.rs b/stackslib/src/net/stackerdb/db.rs
index 1dab3f4052..2b735668ac 100644
--- a/stackslib/src/net/stackerdb/db.rs
+++ b/stackslib/src/net/stackerdb/db.rs
@@ -293,6 +293,15 @@ impl<'a> StackerDBTx<'a> {
         Ok(())
     }
 
+    /// Shrink a StackerDB.  Remove all slots at and beyond a particular slot ID.
+    fn shrink_stackerdb(&self, stackerdb_id: i64, first_slot_id: u32) -> Result<(), net_error> {
+        let qry = "DELETE FROM chunks WHERE stackerdb_id = ?1 AND slot_id >= ?2";
+        let args = params![&stackerdb_id, &first_slot_id];
+        let mut stmt = self.sql_tx.prepare(&qry)?;
+        stmt.execute(args)?;
+        Ok(())
+    }
+
     /// Update a database's storage slots, e.g. from new configuration state in its smart contract.
     /// Chunk data for slots that no longer exist will be dropped.
     /// Newly-created slots will be instantiated with empty data.
@@ -343,6 +352,8 @@ impl<'a> StackerDBTx<'a> {
                 stmt.execute(args)?;
             }
         }
+        debug!("Shrink {} to {} slots", smart_contract, total_slots_read);
+        self.shrink_stackerdb(stackerdb_id, total_slots_read)?;
         Ok(())
     }
 
diff --git a/stackslib/src/net/stackerdb/mod.rs b/stackslib/src/net/stackerdb/mod.rs
index d310998a19..ea01b1b22a 100644
--- a/stackslib/src/net/stackerdb/mod.rs
+++ b/stackslib/src/net/stackerdb/mod.rs
@@ -341,8 +341,14 @@ impl StackerDBs {
                         &e
                     );
                 }
-            } else if new_config != stackerdb_config && new_config.signers.len() > 0 {
+            } else if (new_config != stackerdb_config && new_config.signers.len() > 0)
+                || (new_config == stackerdb_config
+                    && new_config.signers.len()
+                        != self.get_slot_versions(&stackerdb_contract_id)?.len())
+            {
                 // only reconfigure if the config has changed
+                // (that second check on the length is needed in case the node is a victim of
+                // #5142, which was a bug whereby a stackerdb could never shrink)
                 if let Err(e) = self.reconfigure_stackerdb(&stackerdb_contract_id, &new_config) {
                     warn!(
                         "Failed to create or reconfigure StackerDB {stackerdb_contract_id}: DB error {:?}",
diff --git a/stackslib/src/net/stackerdb/tests/db.rs b/stackslib/src/net/stackerdb/tests/db.rs
index 7371b6b9c5..9bcf800529 100644
--- a/stackslib/src/net/stackerdb/tests/db.rs
+++ b/stackslib/src/net/stackerdb/tests/db.rs
@@ -20,6 +20,7 @@ use std::path::Path;
 use clarity::vm::types::QualifiedContractIdentifier;
 use clarity::vm::ContractName;
 use libstackerdb::SlotMetadata;
+use rusqlite::params;
 use stacks_common::address::{
     AddressHashMode, C32_ADDRESS_VERSION_MAINNET_MULTISIG, C32_ADDRESS_VERSION_MAINNET_SINGLESIG,
 };
@@ -649,6 +650,16 @@ fn test_reconfigure_stackerdb() {
         initial_metadata.push((slot_metadata, chunk_data));
     }
 
+    tx.commit().unwrap();
+
+    let db_slot_metadata = db.get_db_slot_metadata(&sc).unwrap();
+    assert_eq!(db_slot_metadata.len(), pks.len());
+    for (i, slot_md) in db_slot_metadata.iter().enumerate() {
+        let slot_metadata = db.get_slot_metadata(&sc, i as u32).unwrap().unwrap();
+        assert_eq!(slot_metadata, *slot_md);
+    }
+
+    let tx = db.tx_begin(StackerDBConfig::noop()).unwrap();
     let new_pks: Vec<_> = (0..10).map(|_| StacksPrivateKey::new()).collect();
     let reconfigured_pks = vec![
         // first five slots are unchanged
@@ -722,6 +733,91 @@ fn test_reconfigure_stackerdb() {
             assert_eq!(chunk.len(), 0);
         }
     }
+
+    let db_slot_metadata = db.get_db_slot_metadata(&sc).unwrap();
+    assert_eq!(db_slot_metadata.len(), reconfigured_pks.len());
+    for (i, slot_md) in db_slot_metadata.iter().enumerate() {
+        let slot_metadata = db.get_slot_metadata(&sc, i as u32).unwrap().unwrap();
+        assert_eq!(slot_metadata, *slot_md);
+    }
+
+    // reconfigure with fewer slots
+    let new_pks: Vec<_> = (0..10).map(|_| StacksPrivateKey::new()).collect();
+    let reconfigured_pks = vec![
+        // first five slots are unchanged
+        pks[0], pks[1], pks[2], pks[3], pks[4],
+        // next five slots are different, so their contents will be dropped and versions and write
+        // timestamps reset
+        new_pks[0], new_pks[1], new_pks[2], new_pks[3],
+        new_pks[4],
+        // slots 10-15 will disappear
+    ];
+    let reconfigured_addrs: Vec<_> = reconfigured_pks
+        .iter()
+        .map(|pk| {
+            StacksAddress::from_public_keys(
+                C32_ADDRESS_VERSION_MAINNET_SINGLESIG,
+                &AddressHashMode::SerializeP2PKH,
+                1,
+                &vec![StacksPublicKey::from_private(&pk)],
+            )
+            .unwrap()
+        })
+        .collect();
+
+    let tx = db.tx_begin(StackerDBConfig::noop()).unwrap();
+
+    // reconfigure
+    tx.reconfigure_stackerdb(
+        &sc,
+        &reconfigured_addrs
+            .clone()
+            .into_iter()
+            .map(|addr| (addr, 1))
+            .collect::<Vec<_>>(),
+    )
+    .unwrap();
+
+    tx.commit().unwrap();
+
+    for (i, pk) in new_pks.iter().enumerate() {
+        if i < 5 {
+            // first five are unchanged
+            let chunk_data = StackerDBChunkData {
+                slot_id: i as u32,
+                slot_version: 1,
+                sig: MessageSignature::empty(),
+                data: vec![i as u8; 128],
+            };
+
+            let slot_metadata = db.get_slot_metadata(&sc, i as u32).unwrap().unwrap();
+            let chunk = db.get_latest_chunk(&sc, i as u32).unwrap().unwrap();
+
+            assert_eq!(initial_metadata[i].0, slot_metadata);
+            assert_eq!(initial_metadata[i].1.data, chunk);
+        } else if i < 10 {
+            // next five are wiped
+            let slot_metadata = db.get_slot_metadata(&sc, i as u32).unwrap().unwrap();
+            assert_eq!(slot_metadata.slot_id, i as u32);
+            assert_eq!(slot_metadata.slot_version, 0);
+            assert_eq!(slot_metadata.data_hash, Sha512Trunc256Sum([0x00; 32]));
+            assert_eq!(slot_metadata.signature, MessageSignature::empty());
+
+            let chunk = db.get_latest_chunk(&sc, i as u32).unwrap().unwrap();
+            assert_eq!(chunk.len(), 0);
+        } else {
+            // final five are gone
+            let slot_metadata_opt = db.get_slot_metadata(&sc, i as u32).unwrap();
+            assert!(slot_metadata_opt.is_none());
+        }
+    }
+
+    let db_slot_metadata = db.get_db_slot_metadata(&sc).unwrap();
+    assert_eq!(db_slot_metadata.len(), reconfigured_pks.len());
+    for (i, slot_md) in db_slot_metadata.iter().enumerate() {
+        let slot_metadata = db.get_slot_metadata(&sc, i as u32).unwrap().unwrap();
+        assert_eq!(slot_metadata, *slot_md);
+    }
 }
 
 // TODO: max chunk size

From 47436e7ffed5dadff50739cd5ffbdcd962f2cb84 Mon Sep 17 00:00:00 2001
From: Jeff Bencin <jeff.bencin@gmail.com>
Date: Tue, 3 Sep 2024 17:17:35 -0400
Subject: [PATCH 07/13] fix: Improve logging in rusqlite busy handler, and fail
 eventually if probable deadlock detected

---
 stackslib/src/util_lib/db.rs | 54 ++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/stackslib/src/util_lib/db.rs b/stackslib/src/util_lib/db.rs
index f54a9c97ec..be09de2556 100644
--- a/stackslib/src/util_lib/db.rs
+++ b/stackslib/src/util_lib/db.rs
@@ -655,31 +655,44 @@ impl<'a, C: Clone, T: MarfTrieId> DerefMut for IndexDBTx<'a, C, T> {
     }
 }
 
+/// Called by `rusqlite` if we are waiting too long on a database lock
 pub fn tx_busy_handler(run_count: i32) -> bool {
-    let mut sleep_count = 2;
-    if run_count > 0 {
-        sleep_count = 2u64.saturating_pow(run_count as u32);
+    const TIMEOUT: Duration = Duration::from_secs(60);
+    const AVG_SLEEP_TIME_MS: u64 = 100;
+
+    // First, check if this is taking unreasonably long. If so, it's probably a deadlock
+    let run_count = run_count.unsigned_abs();
+    let approx_time_elapsed =
+        Duration::from_millis(AVG_SLEEP_TIME_MS.saturating_mul(u64::from(run_count)));
+    if approx_time_elapsed > TIMEOUT {
+        error!("Probable deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs();
+            "run_count" => run_count,
+            "backtrace" => ?Backtrace::capture()
+        );
+        return false;
     }
-    sleep_count = sleep_count.saturating_add(thread_rng().gen::<u64>() % sleep_count);
 
-    if sleep_count > 100 {
-        let jitter = thread_rng().gen::<u64>() % 20;
-        sleep_count = 100 - jitter;
+    let mut sleep_time_ms = 2u64.saturating_pow(run_count);
+
+    sleep_time_ms = sleep_time_ms.saturating_add(thread_rng().gen_range(0..sleep_time_ms));
+
+    if sleep_time_ms > AVG_SLEEP_TIME_MS {
+        let bound = 10;
+        let jitter = thread_rng().gen_range(0..bound * 2);
+        sleep_time_ms = (AVG_SLEEP_TIME_MS - bound) + jitter;
     }
 
-    debug!(
-        "Database is locked; sleeping {}ms and trying again",
-        &sleep_count;
-        "backtrace" => ?{
-            if run_count > 10 && run_count % 10 == 0 {
-                Some(Backtrace::capture())
-            } else {
-                None
-            }
-        },
-    );
+    let msg = format!("Database is locked; sleeping {sleep_time_ms}ms and trying again");
+    if run_count > 10 && run_count % 10 == 0 {
+        warn!("{msg}";
+            "run_count" => run_count,
+            "backtrace" => ?Backtrace::capture()
+        );
+    } else {
+        debug!("{msg}");
+    }
 
-    sleep_ms(sleep_count);
+    sleep_ms(sleep_time_ms);
     true
 }
 
@@ -696,8 +709,7 @@ pub fn tx_begin_immediate<'a>(conn: &'a mut Connection) -> Result<DBTx<'a>, Erro
 /// Sames as `tx_begin_immediate` except that it returns a rusqlite error.
 pub fn tx_begin_immediate_sqlite<'a>(conn: &'a mut Connection) -> Result<DBTx<'a>, sqlite_error> {
     conn.busy_handler(Some(tx_busy_handler))?;
-    let tx = Transaction::new(conn, TransactionBehavior::Immediate)?;
-    Ok(tx)
+    Transaction::new(conn, TransactionBehavior::Immediate)
 }
 
 #[cfg(feature = "profile-sqlite")]

From 3b24bd34c15f00ae0549a69717d5e2624ea816e0 Mon Sep 17 00:00:00 2001
From: Jeff Bencin <jeff.bencin@gmail.com>
Date: Wed, 4 Sep 2024 13:24:07 -0400
Subject: [PATCH 08/13] chore: Address PR comment from Brice

---
 stackslib/src/util_lib/db.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stackslib/src/util_lib/db.rs b/stackslib/src/util_lib/db.rs
index be09de2556..4728b83e73 100644
--- a/stackslib/src/util_lib/db.rs
+++ b/stackslib/src/util_lib/db.rs
@@ -677,9 +677,9 @@ pub fn tx_busy_handler(run_count: i32) -> bool {
     sleep_time_ms = sleep_time_ms.saturating_add(thread_rng().gen_range(0..sleep_time_ms));
 
     if sleep_time_ms > AVG_SLEEP_TIME_MS {
-        let bound = 10;
-        let jitter = thread_rng().gen_range(0..bound * 2);
-        sleep_time_ms = (AVG_SLEEP_TIME_MS - bound) + jitter;
+        let jitter = 10;
+        sleep_time_ms =
+            thread_rng().gen_range((AVG_SLEEP_TIME_MS - jitter)..(AVG_SLEEP_TIME_MS + jitter));
     }
 
     let msg = format!("Database is locked; sleeping {sleep_time_ms}ms and trying again");

From 8dcfadf8c205c720a3b16086c195c9c2617bcf4e Mon Sep 17 00:00:00 2001
From: Jeff Bencin <jeff.bencin@gmail.com>
Date: Thu, 5 Sep 2024 08:56:27 -0400
Subject: [PATCH 09/13] chore: Update the `tx_busy_handler()` in stacks-common
 also

---
 clarity/src/vm/database/sqlite.rs |  2 +-
 stacks-common/src/util/db.rs      | 64 +++++++++++++++++++++++++++++++
 stacks-common/src/util/mod.rs     | 27 +------------
 stackslib/src/util_lib/db.rs      | 38 +-----------------
 4 files changed, 67 insertions(+), 64 deletions(-)
 create mode 100644 stacks-common/src/util/db.rs

diff --git a/clarity/src/vm/database/sqlite.rs b/clarity/src/vm/database/sqlite.rs
index dc3ad4f5bd..7d2af59eb5 100644
--- a/clarity/src/vm/database/sqlite.rs
+++ b/clarity/src/vm/database/sqlite.rs
@@ -21,7 +21,7 @@ use rusqlite::{
 };
 use stacks_common::types::chainstate::{BlockHeaderHash, StacksBlockId};
 use stacks_common::types::sqlite::NO_PARAMS;
-use stacks_common::util::db_common::tx_busy_handler;
+use stacks_common::util::db::tx_busy_handler;
 use stacks_common::util::hash::Sha512Trunc256Sum;
 
 use super::clarity_store::{make_contract_hash_key, ContractCommitment};
diff --git a/stacks-common/src/util/db.rs b/stacks-common/src/util/db.rs
new file mode 100644
index 0000000000..29ebdd0bb0
--- /dev/null
+++ b/stacks-common/src/util/db.rs
@@ -0,0 +1,64 @@
+// Copyright (C) 2013-2020 Blockstack PBC, a public benefit corporation
+// Copyright (C) 2020 Stacks Open Internet Foundation
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+use std::backtrace::Backtrace;
+use std::time::Duration;
+
+use rand::{thread_rng, Rng};
+
+use crate::util::sleep_ms;
+
+/// Called by `rusqlite` if we are waiting too long on a database lock
+/// If called too many times, will fail to avoid deadlocks
+pub fn tx_busy_handler(run_count: i32) -> bool {
+    const TIMEOUT: Duration = Duration::from_secs(60);
+    const AVG_SLEEP_TIME_MS: u64 = 100;
+
+    // First, check if this is taking unreasonably long. If so, it's probably a deadlock
+    let run_count = run_count.unsigned_abs();
+    let approx_time_elapsed =
+        Duration::from_millis(AVG_SLEEP_TIME_MS.saturating_mul(u64::from(run_count)));
+    if approx_time_elapsed > TIMEOUT {
+        error!("Probable deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs();
+            "run_count" => run_count,
+            "backtrace" => ?Backtrace::capture()
+        );
+        return false;
+    }
+
+    let mut sleep_time_ms = 2u64.saturating_pow(run_count);
+
+    sleep_time_ms = sleep_time_ms.saturating_add(thread_rng().gen_range(0..sleep_time_ms));
+
+    if sleep_time_ms > AVG_SLEEP_TIME_MS {
+        let jitter = 10;
+        sleep_time_ms =
+            thread_rng().gen_range((AVG_SLEEP_TIME_MS - jitter)..(AVG_SLEEP_TIME_MS + jitter));
+    }
+
+    let msg = format!("Database is locked; sleeping {sleep_time_ms}ms and trying again");
+    if run_count > 10 && run_count % 10 == 0 {
+        warn!("{msg}";
+            "run_count" => run_count,
+            "backtrace" => ?Backtrace::capture()
+        );
+    } else {
+        debug!("{msg}");
+    }
+
+    sleep_ms(sleep_time_ms);
+    true
+}
diff --git a/stacks-common/src/util/mod.rs b/stacks-common/src/util/mod.rs
index 13ab79dcb3..a9dfc47806 100644
--- a/stacks-common/src/util/mod.rs
+++ b/stacks-common/src/util/mod.rs
@@ -19,6 +19,7 @@ pub mod log;
 #[macro_use]
 pub mod macros;
 pub mod chunked_encoding;
+pub mod db;
 pub mod hash;
 pub mod pair;
 pub mod pipe;
@@ -85,32 +86,6 @@ impl error::Error for HexError {
     }
 }
 
-pub mod db_common {
-    use std::{thread, time};
-
-    use rand::{thread_rng, Rng};
-
-    pub fn tx_busy_handler(run_count: i32) -> bool {
-        let mut sleep_count = 10;
-        if run_count > 0 {
-            sleep_count = 2u64.saturating_pow(run_count as u32);
-        }
-        sleep_count = sleep_count.saturating_add(thread_rng().gen::<u64>() % sleep_count);
-
-        if sleep_count > 5000 {
-            sleep_count = 5000;
-        }
-
-        debug!(
-            "Database is locked; sleeping {}ms and trying again",
-            &sleep_count
-        );
-
-        thread::sleep(time::Duration::from_millis(sleep_count));
-        true
-    }
-}
-
 /// Write any `serde_json` object directly to a file
 pub fn serialize_json_to_file<J, P>(json: &J, path: P) -> Result<(), std::io::Error>
 where
diff --git a/stackslib/src/util_lib/db.rs b/stackslib/src/util_lib/db.rs
index 4728b83e73..a0496d3bfc 100644
--- a/stackslib/src/util_lib/db.rs
+++ b/stackslib/src/util_lib/db.rs
@@ -657,43 +657,7 @@ impl<'a, C: Clone, T: MarfTrieId> DerefMut for IndexDBTx<'a, C, T> {
 
 /// Called by `rusqlite` if we are waiting too long on a database lock
 pub fn tx_busy_handler(run_count: i32) -> bool {
-    const TIMEOUT: Duration = Duration::from_secs(60);
-    const AVG_SLEEP_TIME_MS: u64 = 100;
-
-    // First, check if this is taking unreasonably long. If so, it's probably a deadlock
-    let run_count = run_count.unsigned_abs();
-    let approx_time_elapsed =
-        Duration::from_millis(AVG_SLEEP_TIME_MS.saturating_mul(u64::from(run_count)));
-    if approx_time_elapsed > TIMEOUT {
-        error!("Probable deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs();
-            "run_count" => run_count,
-            "backtrace" => ?Backtrace::capture()
-        );
-        return false;
-    }
-
-    let mut sleep_time_ms = 2u64.saturating_pow(run_count);
-
-    sleep_time_ms = sleep_time_ms.saturating_add(thread_rng().gen_range(0..sleep_time_ms));
-
-    if sleep_time_ms > AVG_SLEEP_TIME_MS {
-        let jitter = 10;
-        sleep_time_ms =
-            thread_rng().gen_range((AVG_SLEEP_TIME_MS - jitter)..(AVG_SLEEP_TIME_MS + jitter));
-    }
-
-    let msg = format!("Database is locked; sleeping {sleep_time_ms}ms and trying again");
-    if run_count > 10 && run_count % 10 == 0 {
-        warn!("{msg}";
-            "run_count" => run_count,
-            "backtrace" => ?Backtrace::capture()
-        );
-    } else {
-        debug!("{msg}");
-    }
-
-    sleep_ms(sleep_time_ms);
-    true
+    stacks_common::util::db::tx_busy_handler(run_count)
 }
 
 /// Begin an immediate-mode transaction, and handle busy errors with exponential backoff.

From 021ab583d2bbfde9e8e93de5b6674a7bc62d6091 Mon Sep 17 00:00:00 2001
From: Jeff Bencin <jeff.bencin@gmail.com>
Date: Wed, 11 Sep 2024 12:31:31 -0400
Subject: [PATCH 10/13] chore: Add `LOCK_TABLE` to print out all DB locks if
 deadlock detected

---
 stacks-common/src/util/db.rs | 29 ++++++++++++++++++++++++-----
 stackslib/src/util_lib/db.rs | 10 ++++++++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/stacks-common/src/util/db.rs b/stacks-common/src/util/db.rs
index 29ebdd0bb0..fff39a3297 100644
--- a/stacks-common/src/util/db.rs
+++ b/stacks-common/src/util/db.rs
@@ -15,16 +15,32 @@
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 use std::backtrace::Backtrace;
-use std::time::Duration;
+use std::sync::{LazyLock, Mutex};
+use std::thread;
+use std::time::{Duration, Instant};
 
+use hashbrown::HashMap;
 use rand::{thread_rng, Rng};
 
 use crate::util::sleep_ms;
 
+/// Keep track of DB locks, for deadlock debugging
+///  - **key:** `rusqlite::Connection` debug print
+///  - **value:** Lock holder (thread name + timestamp)
+///
+/// This uses a `Mutex` inside of `LazyLock` because:
+///  - Using `Mutex` alone, it can't be statically initialized because `HashMap::new()` isn't `const`
+///  - Using `LazyLock` alone doesn't allow interior mutability
+pub static LOCK_TABLE: LazyLock<Mutex<HashMap<String, String>>> =
+    LazyLock::new(|| Mutex::new(HashMap::new()));
+/// Generate timestanps for use in `LOCK_TABLE`
+/// `Instant` is preferable to `SystemTime` because it uses `CLOCK_MONOTONIC` and is not affected by NTP adjustments
+pub static LOCK_TABLE_TIMER: LazyLock<Instant> = LazyLock::new(Instant::now);
+
 /// Called by `rusqlite` if we are waiting too long on a database lock
-/// If called too many times, will fail to avoid deadlocks
+/// If called too many times, will assume a deadlock and panic
 pub fn tx_busy_handler(run_count: i32) -> bool {
-    const TIMEOUT: Duration = Duration::from_secs(60);
+    const TIMEOUT: Duration = Duration::from_secs(300);
     const AVG_SLEEP_TIME_MS: u64 = 100;
 
     // First, check if this is taking unreasonably long. If so, it's probably a deadlock
@@ -32,11 +48,14 @@ pub fn tx_busy_handler(run_count: i32) -> bool {
     let approx_time_elapsed =
         Duration::from_millis(AVG_SLEEP_TIME_MS.saturating_mul(u64::from(run_count)));
     if approx_time_elapsed > TIMEOUT {
-        error!("Probable deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs();
+        error!("Deadlock detected. Waited {} seconds (estimated) for database lock. Giving up", approx_time_elapsed.as_secs();
             "run_count" => run_count,
             "backtrace" => ?Backtrace::capture()
         );
-        return false;
+        for (k, v) in LOCK_TABLE.lock().unwrap().iter() {
+            error!("Database '{k}' last locked by {v}");
+        }
+        panic!("Deadlock in thread {:?}", thread::current().name());
     }
 
     let mut sleep_time_ms = 2u64.saturating_pow(run_count);
diff --git a/stackslib/src/util_lib/db.rs b/stackslib/src/util_lib/db.rs
index a0496d3bfc..70850d372c 100644
--- a/stackslib/src/util_lib/db.rs
+++ b/stackslib/src/util_lib/db.rs
@@ -18,7 +18,7 @@ use std::backtrace::Backtrace;
 use std::io::Error as IOError;
 use std::ops::{Deref, DerefMut};
 use std::path::{Path, PathBuf};
-use std::time::Duration;
+use std::time::{Duration, SystemTime};
 use std::{error, fmt, fs, io};
 
 use clarity::vm::types::QualifiedContractIdentifier;
@@ -32,6 +32,7 @@ use serde_json::Error as serde_error;
 use stacks_common::types::chainstate::{SortitionId, StacksAddress, StacksBlockId, TrieHash};
 use stacks_common::types::sqlite::NO_PARAMS;
 use stacks_common::types::Address;
+use stacks_common::util::db::{LOCK_TABLE, LOCK_TABLE_TIMER};
 use stacks_common::util::hash::to_hex;
 use stacks_common::util::secp256k1::{Secp256k1PrivateKey, Secp256k1PublicKey};
 use stacks_common::util::sleep_ms;
@@ -673,7 +674,12 @@ pub fn tx_begin_immediate<'a>(conn: &'a mut Connection) -> Result<DBTx<'a>, Erro
 /// Sames as `tx_begin_immediate` except that it returns a rusqlite error.
 pub fn tx_begin_immediate_sqlite<'a>(conn: &'a mut Connection) -> Result<DBTx<'a>, sqlite_error> {
     conn.busy_handler(Some(tx_busy_handler))?;
-    Transaction::new(conn, TransactionBehavior::Immediate)
+    let tx = Transaction::new(conn, TransactionBehavior::Immediate)?;
+    let time = LOCK_TABLE_TIMER.elapsed().as_millis();
+    let k = format!("{:?}", tx.deref());
+    let v = format!("{:?}@{time}", std::thread::current().name());
+    LOCK_TABLE.lock().unwrap().insert(k, v);
+    Ok(tx)
 }
 
 #[cfg(feature = "profile-sqlite")]

From 67d0f308edd39a7270ea327c92c510fe59fd27ff Mon Sep 17 00:00:00 2001
From: Jeff Bencin <jeff.bencin@gmail.com>
Date: Wed, 11 Sep 2024 12:45:16 -0400
Subject: [PATCH 11/13] refactor: Move logic for updating `LOCK_TABLE` into
 stacks-common

---
 stacks-common/src/util/db.rs | 14 ++++++++++++--
 stackslib/src/util_lib/db.rs |  7 ++-----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/stacks-common/src/util/db.rs b/stacks-common/src/util/db.rs
index fff39a3297..257a98aab9 100644
--- a/stacks-common/src/util/db.rs
+++ b/stacks-common/src/util/db.rs
@@ -21,6 +21,7 @@ use std::time::{Duration, Instant};
 
 use hashbrown::HashMap;
 use rand::{thread_rng, Rng};
+use rusqlite::Connection;
 
 use crate::util::sleep_ms;
 
@@ -31,11 +32,20 @@ use crate::util::sleep_ms;
 /// This uses a `Mutex` inside of `LazyLock` because:
 ///  - Using `Mutex` alone, it can't be statically initialized because `HashMap::new()` isn't `const`
 ///  - Using `LazyLock` alone doesn't allow interior mutability
-pub static LOCK_TABLE: LazyLock<Mutex<HashMap<String, String>>> =
+static LOCK_TABLE: LazyLock<Mutex<HashMap<String, String>>> =
     LazyLock::new(|| Mutex::new(HashMap::new()));
 /// Generate timestanps for use in `LOCK_TABLE`
 /// `Instant` is preferable to `SystemTime` because it uses `CLOCK_MONOTONIC` and is not affected by NTP adjustments
-pub static LOCK_TABLE_TIMER: LazyLock<Instant> = LazyLock::new(Instant::now);
+static LOCK_TABLE_TIMER: LazyLock<Instant> = LazyLock::new(Instant::now);
+
+/// Call when using an operation which locks a database
+/// Updates `LOCK_TABLE`
+pub fn update_lock_table(conn: &Connection) {
+    let timestamp = LOCK_TABLE_TIMER.elapsed().as_millis();
+    let k = format!("{conn:?}");
+    let v = format!("{:?}@{timestamp}", std::thread::current().name());
+    LOCK_TABLE.lock().unwrap().insert(k, v);
+}
 
 /// Called by `rusqlite` if we are waiting too long on a database lock
 /// If called too many times, will assume a deadlock and panic
diff --git a/stackslib/src/util_lib/db.rs b/stackslib/src/util_lib/db.rs
index 70850d372c..53f597daa2 100644
--- a/stackslib/src/util_lib/db.rs
+++ b/stackslib/src/util_lib/db.rs
@@ -32,7 +32,7 @@ use serde_json::Error as serde_error;
 use stacks_common::types::chainstate::{SortitionId, StacksAddress, StacksBlockId, TrieHash};
 use stacks_common::types::sqlite::NO_PARAMS;
 use stacks_common::types::Address;
-use stacks_common::util::db::{LOCK_TABLE, LOCK_TABLE_TIMER};
+use stacks_common::util::db::update_lock_table;
 use stacks_common::util::hash::to_hex;
 use stacks_common::util::secp256k1::{Secp256k1PrivateKey, Secp256k1PublicKey};
 use stacks_common::util::sleep_ms;
@@ -675,10 +675,7 @@ pub fn tx_begin_immediate<'a>(conn: &'a mut Connection) -> Result<DBTx<'a>, Erro
 pub fn tx_begin_immediate_sqlite<'a>(conn: &'a mut Connection) -> Result<DBTx<'a>, sqlite_error> {
     conn.busy_handler(Some(tx_busy_handler))?;
     let tx = Transaction::new(conn, TransactionBehavior::Immediate)?;
-    let time = LOCK_TABLE_TIMER.elapsed().as_millis();
-    let k = format!("{:?}", tx.deref());
-    let v = format!("{:?}@{time}", std::thread::current().name());
-    LOCK_TABLE.lock().unwrap().insert(k, v);
+    update_lock_table(tx.deref());
     Ok(tx)
 }
 

From 292cd8902286c3b9224d31c2050cff2de7049774 Mon Sep 17 00:00:00 2001
From: Jeff Bencin <jeff.bencin@gmail.com>
Date: Wed, 11 Sep 2024 12:50:04 -0400
Subject: [PATCH 12/13] chore: Add comment

---
 stacks-common/src/util/db.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/stacks-common/src/util/db.rs b/stacks-common/src/util/db.rs
index 257a98aab9..89fe4677c7 100644
--- a/stacks-common/src/util/db.rs
+++ b/stacks-common/src/util/db.rs
@@ -42,8 +42,9 @@ static LOCK_TABLE_TIMER: LazyLock<Instant> = LazyLock::new(Instant::now);
 /// Updates `LOCK_TABLE`
 pub fn update_lock_table(conn: &Connection) {
     let timestamp = LOCK_TABLE_TIMER.elapsed().as_millis();
+    // The debug format for `Connection` includes the path
     let k = format!("{conn:?}");
-    let v = format!("{:?}@{timestamp}", std::thread::current().name());
+    let v = format!("{:?}@{timestamp}", thread::current().name());
     LOCK_TABLE.lock().unwrap().insert(k, v);
 }
 

From c77c024db6192cf8bbbac99249fd985418d246d5 Mon Sep 17 00:00:00 2001
From: Brice Dobry <brice@hiro.so>
Date: Fri, 13 Sep 2024 10:52:47 -0400
Subject: [PATCH 13/13] chore: add info to signature gathering failure logs

---
 .../stacks-node/src/nakamoto_node/miner.rs    | 40 ++++++++++++-------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs
index c3a1baea60..ef2a5e609d 100644
--- a/testnet/stacks-node/src/nakamoto_node/miner.rs
+++ b/testnet/stacks-node/src/nakamoto_node/miner.rs
@@ -358,22 +358,32 @@ impl BlockMinerThread {
                     &mut attempts,
                 ) {
                     Ok(x) => x,
-                    Err(e) => {
-                        match e {
-                            NakamotoNodeError::StacksTipChanged => {
-                                info!("Stacks tip changed while waiting for signatures");
-                                return Err(e);
-                            }
-                            NakamotoNodeError::BurnchainTipChanged => {
-                                info!("Burnchain tip changed while waiting for signatures");
-                                return Err(e);
-                            }
-                            _ => {
-                                error!("Error while gathering signatures: {e:?}. Will try mining again.");
-                                continue;
-                            }
+                    Err(e) => match e {
+                        NakamotoNodeError::StacksTipChanged => {
+                            info!("Stacks tip changed while waiting for signatures";
+                                "signer_sighash" => %new_block.header.signer_signature_hash(),
+                                "block_height" => new_block.header.chain_length,
+                                "consensus_hash" => %new_block.header.consensus_hash,
+                            );
+                            return Err(e);
                         }
-                    }
+                        NakamotoNodeError::BurnchainTipChanged => {
+                            info!("Burnchain tip changed while waiting for signatures";
+                                "signer_sighash" => %new_block.header.signer_signature_hash(),
+                                "block_height" => new_block.header.chain_length,
+                                "consensus_hash" => %new_block.header.consensus_hash,
+                            );
+                            return Err(e);
+                        }
+                        _ => {
+                            error!("Error while gathering signatures: {e:?}. Will try mining again.";
+                                "signer_sighash" => %new_block.header.signer_signature_hash(),
+                                "block_height" => new_block.header.chain_length,
+                                "consensus_hash" => %new_block.header.consensus_hash,
+                            );
+                            continue;
+                        }
+                    },
                 };
 
                 new_block.header.signer_signature = signer_signature;