Skip to content

Commit a0084f5

Browse files
codablockpanleone
authored andcommitted
Multiple fixes and optimizations for LLMQs and ChainLocks (#2724)
* Indicate success when signing was unnecessary * Fix typo in name of LLMQ_400_60 * Move RemoveAskFor call for CLSIGs into ProcessNewChainLock In case we got INV items for the same CLSIG that we recreated through HandleNewRecoveredSig, (re-)requesting of the CLSIG from other peers becomes unnecessary. * Move Cleanup() call in CChainLocksHandler::UpdatedBlockTip up We bail out early in a few situations from this method, so that Cleanup() might not be called while its at the bottom. * Bail out from CChainLocksHandler::UpdatedBlockTip if we already got the CLSIG * Call RemoveAskFor when QFCOMMITMENT was received Otherwise we might end up re-requesting it for a very long time when the commitment INV was received shortly before it got mined. * Call RemoveSigSharesForSession when a recovered sig is received Otherwise we end up with session data in node states lingering around until a fake "timeout" occurs (can be seen in the logs). * Better handling of false-positive conflicts in CSigningManager The old code was emitting a lot of messages in logs as it treated sigs for exactly the same session as a conflict. This commit fixes this by looking at the signHash before logging. Also handle a corner-case where a recovered sig might be deleted between the HasRecoveredSigForId and GetRecoveredSigById call. * Don't run into session timeout when sig shares come in slow Instead of just tracking when the first share was received, we now also track when the last (non-duplicate) share was received. Sessios will now timeout 5 minutes after the first share arrives, or 1 minute after the last one arrived.
1 parent 0613978 commit a0084f5

File tree

5 files changed

+87
-25
lines changed

5 files changed

+87
-25
lines changed

src/llmq/quorums_chainlocks.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,17 @@ void CChainLocksHandler::ProcessMessage(CNode* pfrom, const std::string& strComm
7777

7878
auto hash = ::SerializeHash(clsig);
7979

80-
{
81-
LOCK(cs_main);
82-
connman.RemoveAskFor(hash, MSG_CLSIG);
83-
}
84-
8580
ProcessNewChainLock(pfrom->GetId(), clsig, hash);
8681
}
8782
}
8883

8984
void CChainLocksHandler::ProcessNewChainLock(NodeId from, const llmq::CChainLockSig& clsig, const uint256& hash)
9085
{
86+
{
87+
LOCK(cs_main);
88+
g_connman->RemoveAskFor(hash, MSG_CLSIG);
89+
}
90+
9191
{
9292
LOCK(cs);
9393
if (!seenChainLocks.emplace(hash, GetTimeMillis()).second) {
@@ -190,6 +190,8 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl
190190
return;
191191
}
192192

193+
Cleanup();
194+
193195
// DIP8 defines a process called "Signing attempts" which should run before the CLSIG is finalized
194196
// To simplify the initial implementation, we skip this process and directly try to create a CLSIG
195197
// This will fail when multiple blocks compete, but we accept this for the initial implementation.
@@ -201,6 +203,12 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl
201203
{
202204
LOCK(cs);
203205

206+
if (bestChainLockBlockIndex == pindexNew) {
207+
// we first got the CLSIG, then the header, and then the block was connected.
208+
// In this case there is no need to continue here.
209+
return;
210+
}
211+
204212
if (InternalHasConflictingChainLock(pindexNew->nHeight, pindexNew->GetBlockHash())) {
205213
if (!inEnforceBestChainLock) {
206214
// we accepted this block when there was no lock yet, but now a conflicting lock appeared. Invalidate it.
@@ -226,8 +234,6 @@ void CChainLocksHandler::UpdatedBlockTip(const CBlockIndex* pindexNew, const CBl
226234
}
227235

228236
quorumSigningManager->AsyncSignIfMember(Params().GetConsensus().llmqChainLocks, requestId, msgHash);
229-
230-
Cleanup();
231237
}
232238

233239
// WARNING: cs_main and cs should not be held!

src/llmq/quorums_init.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ void StartLLMQSystem()
5454
quorumDKGSessionManager->StartThreads();
5555
}
5656
if (quorumSigSharesManager) {
57+
quorumSigSharesManager->RegisterAsRecoveredSigsListener();
5758
quorumSigSharesManager->StartWorkerThread();
5859
}
5960
if (chainLocksHandler) {
@@ -68,6 +69,7 @@ void StopLLMQSystem()
6869
}
6970
if (quorumSigSharesManager) {
7071
quorumSigSharesManager->StopWorkerThread();
72+
quorumSigSharesManager->UnregisterAsRecoveredSigsListener();
7173
}
7274
if (quorumDKGSessionManager) {
7375
quorumDKGSessionManager->StopThreads();

src/llmq/quorums_signing.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -494,11 +494,25 @@ void CSigningManager::ProcessRecoveredSig(NodeId nodeId, const CRecoveredSig& re
494494
signHash.ToString(), recoveredSig.id.ToString(), recoveredSig.msgHash.ToString(), nodeId);
495495

496496
if (db.HasRecoveredSigForId(llmqType, recoveredSig.id)) {
497-
// this should really not happen, as each masternode is participating in only one vote,
498-
// even if it's a member of multiple quorums. so a majority is only possible on one quorum and one msgHash per id
499-
LogPrintf("CSigningManager::%s -- conflicting recoveredSig for id=%s, msgHash=%s\n", __func__,
500-
recoveredSig.id.ToString(), recoveredSig.msgHash.ToString());
501-
return;
497+
CRecoveredSig otherRecoveredSig;
498+
if (db.GetRecoveredSigById(llmqType, recoveredSig.id, otherRecoveredSig)) {
499+
auto otherSignHash = llmq::utils::BuildSignHash(recoveredSig);
500+
if (signHash != otherSignHash) {
501+
// this should really not happen, as each masternode is participating in only one vote,
502+
// even if it's a member of multiple quorums. so a majority is only possible on one quorum and one msgHash per id
503+
LogPrintf("CSigningManager::%s -- conflicting recoveredSig for signHash=%s, id=%s, msgHash=%s, otherSignHash=%s\n", __func__,
504+
signHash.ToString(), recoveredSig.id.ToString(), recoveredSig.msgHash.ToString(), otherSignHash.ToString());
505+
} else {
506+
// Looks like we're trying to process a recSig that is already known. This might happen if the same
507+
// recSig comes in through regular QRECSIG messages and at the same time through some other message
508+
// which allowed to reconstruct a recSig (e.g. IXLOCK). In this case, just bail out.
509+
}
510+
return;
511+
} else {
512+
// This case is very unlikely. It can only happen when cleanup caused this specific recSig to vanish
513+
// between the HasRecoveredSigForId and GetRecoveredSigById call. If that happens, treat it as if we
514+
// never had that recSig
515+
}
502516
}
503517

504518
db.WriteRecoveredSig(recoveredSig);
@@ -552,14 +566,19 @@ bool CSigningManager::AsyncSignIfMember(Consensus::LLMQType llmqType, const uint
552566
if (db.HasVotedOnId(llmqType, id)) {
553567
uint256 prevMsgHash;
554568
db.GetVoteForId(llmqType, id, prevMsgHash);
555-
LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting on conflicting msgHash=%s\n", __func__,
556-
id.ToString(), prevMsgHash.ToString(), msgHash.ToString());
569+
if (msgHash != prevMsgHash) {
570+
LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting on conflicting msgHash=%s\n", __func__,
571+
id.ToString(), prevMsgHash.ToString(), msgHash.ToString());
572+
} else {
573+
LogPrintf("CSigningManager::%s -- already voted for id=%s and msgHash=%s. Not voting again.\n", __func__,
574+
id.ToString(), prevMsgHash.ToString());
575+
}
557576
return false;
558577
}
559578

560579
if (db.HasRecoveredSigForId(llmqType, id)) {
561580
// no need to sign it if we already have a recovered sig
562-
return false;
581+
return true;
563582
}
564583
db.WriteVoteForId(llmqType, id, msgHash);
565584
}

src/llmq/quorums_signing_shares.cpp

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@
33
// Distributed under the MIT/X11 software license, see the accompanying
44
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
55

6-
#include "quorums_signing_shares.h"
6+
#include "quorums_signing.h"
7+
78
#include "activemasternode.h"
89
#include "bls/bls_batchverifier.h"
910
#include "cxxtimer.h"
1011
#include "init.h"
1112
#include "net.h"
1213
#include "net_processing.h"
1314
#include "netmessagemaker.h"
14-
#include "quorums_signing.h"
15+
#include "quorums_signing_shares.h"
1516
#include "quorums_utils.h"
1617
#include "random.h"
1718
#include "shutdown.h"
@@ -171,6 +172,16 @@ void CSigSharesManager::StopWorkerThread()
171172
}
172173
}
173174

175+
void CSigSharesManager::RegisterAsRecoveredSigsListener()
176+
{
177+
quorumSigningManager->RegisterRecoveredSigsListener(this);
178+
}
179+
180+
void CSigSharesManager::UnregisterAsRecoveredSigsListener()
181+
{
182+
quorumSigningManager->UnregisterRecoveredSigsListener(this);
183+
}
184+
174185
void CSigSharesManager::Interrupt()
175186
{
176187
interruptSigningShare();
@@ -548,7 +559,16 @@ void CSigSharesManager::ProcessSigShare(NodeId nodeId, const CSigShare& sigShare
548559
}
549560

550561
sigSharesToAnnounce.Add(sigShare.GetKey(), true);
551-
firstSeenForSessions.emplace(sigShare.GetSignHash(), GetTimeMillis());
562+
563+
auto it = timeSeenForSessions.find(sigShare.GetSignHash());
564+
if (it == timeSeenForSessions.end()) {
565+
auto t = GetTimeMillis();
566+
// insert first-seen and last-seen time
567+
timeSeenForSessions.emplace(sigShare.GetSignHash(), std::make_pair(t, t));
568+
} else {
569+
// update last-seen time
570+
it->second.second = GetTimeMillis();
571+
}
552572

553573
if (!quorumNodes.empty()) {
554574
// don't announce and wait for other nodes to request this share and directly send it to them
@@ -946,11 +966,12 @@ void CSigSharesManager::Cleanup()
946966

947967
// Remove sessions which timed out
948968
std::unordered_set<uint256> timeoutSessions;
949-
for (auto& p : firstSeenForSessions) {
969+
for (auto& p : timeSeenForSessions) {
950970
auto& signHash = p.first;
951-
int64_t time = p.second;
971+
int64_t firstSeenTime = p.second.first;
972+
int64_t lastSeenTime = p.second.second;
952973

953-
if (now - time >= SIGNING_SESSION_TIMEOUT) {
974+
if (now - firstSeenTime >= SESSION_TOTAL_TIMEOUT || now - lastSeenTime >= SESSION_NEW_SHARES_TIMEOUT) {
954975
timeoutSessions.emplace(signHash);
955976
}
956977
}
@@ -1032,7 +1053,7 @@ void CSigSharesManager::RemoveSigSharesForSession(const uint256& signHash)
10321053
sigSharesRequested.EraseAllForSignHash(signHash);
10331054
sigSharesToAnnounce.EraseAllForSignHash(signHash);
10341055
sigShares.EraseAllForSignHash(signHash);
1035-
firstSeenForSessions.erase(signHash);
1056+
timeSeenForSessions.erase(signHash);
10361057
}
10371058

10381059
void CSigSharesManager::RemoveBannedNodeStates()
@@ -1171,4 +1192,11 @@ void CSigSharesManager::Sign(const CQuorumCPtr& quorum, const uint256& id, const
11711192
sigShare.id.ToString(), sigShare.msgHash.ToString(), t.count());
11721193
ProcessSigShare(-1, sigShare, *g_connman, quorum);
11731194
}
1195+
1196+
void CSigSharesManager::HandleNewRecoveredSig(const llmq::CRecoveredSig& recoveredSig)
1197+
{
1198+
LOCK(cs);
1199+
RemoveSigSharesForSession(llmq::utils::BuildSignHash(recoveredSig));
1200+
}
1201+
11741202
} // namespace llmq

src/llmq/quorums_signing_shares.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -325,9 +325,10 @@ class CSigSharesNodeState
325325
void RemoveSession(const uint256& signHash);
326326
};
327327

328-
class CSigSharesManager
328+
class CSigSharesManager : public CRecoveredSigsListener
329329
{
330-
static const int64_t SIGNING_SESSION_TIMEOUT = 60 * 1000;
330+
static const int64_t SESSION_NEW_SHARES_TIMEOUT = 60 * 1000;
331+
static const int64_t SESSION_TOTAL_TIMEOUT = 5 * 60 * 1000;
331332
static const int64_t SIG_SHARE_REQUEST_TIMEOUT = 5 * 1000;
332333

333334
private:
@@ -337,7 +338,9 @@ class CSigSharesManager
337338
CThreadInterrupt interruptSigningShare;
338339

339340
SigShareMap<CSigShare> sigShares;
340-
std::unordered_map<uint256, int64_t> firstSeenForSessions;
341+
342+
// stores time of first and last receivedSigShare. Used to detect timeouts
343+
std::unordered_map<uint256, std::pair<int64_t, int64_t>> timeSeenForSessions;
341344

342345
std::unordered_map<NodeId, CSigSharesNodeState> nodeStates;
343346
SigShareMap<std::pair<NodeId, int64_t>> sigSharesRequested;
@@ -357,13 +360,17 @@ class CSigSharesManager
357360
void StartWorkerThread();
358361
void StopWorkerThread();
359362
void Interrupt();
363+
void RegisterAsRecoveredSigsListener();
364+
void UnregisterAsRecoveredSigsListener();
360365

361366
public:
362367
void ProcessMessage(CNode* pnode, const std::string& strCommand, CDataStream& vRecv, CConnman& connman);
363368

364369
void AsyncSign(const CQuorumCPtr& quorum, const uint256& id, const uint256& msgHash);
365370
void Sign(const CQuorumCPtr& quorum, const uint256& id, const uint256& msgHash);
366371

372+
void HandleNewRecoveredSig(const CRecoveredSig& recoveredSig);
373+
367374
private:
368375
void ProcessMessageSigSharesInv(CNode* pfrom, const CSigSharesInv& inv, CConnman& connman);
369376
void ProcessMessageGetSigShares(CNode* pfrom, const CSigSharesInv& inv, CConnman& connman);

0 commit comments

Comments
 (0)