Skip to content

Commit 8585de6

Browse files
authored
fix a node can't accept more logs after receiving snapshot (#3909)
1 parent 80061f2 commit 8585de6

File tree

4 files changed

+40
-6
lines changed

4 files changed

+40
-6
lines changed

src/kvstore/raftex/RaftLogIterator.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace raftex {
1919
class RaftLogIterator final : public LogIterator {
2020
public:
2121
/**
22-
* @brief Construct a new raf log iterator
22+
* @brief Construct a new raft log iterator
2323
*
2424
* @param firstLogId First log id in iterator
2525
* @param logEntries Log entries from rpc request

src/kvstore/raftex/RaftPart.cpp

+33-3
Original file line numberDiff line numberDiff line change
@@ -1566,12 +1566,42 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req,
15661566
// previously in fact. There are two choise: ask leader to send logs after committedLogId_ or
15671567
// just do nothing.
15681568
if (req.get_last_log_id_sent() < committedLogId_ ||
1569-
wal_->lastLogId() < req.get_last_log_id_sent() ||
1570-
wal_->getLogTerm(req.get_last_log_id_sent()) != req.get_last_log_term_sent()) {
1569+
wal_->lastLogId() < req.get_last_log_id_sent()) {
1570+
// case 1 and case 2
1571+
resp.last_matched_log_id_ref() = committedLogId_;
1572+
resp.last_matched_log_term() = committedLogTerm_;
1573+
resp.error_code() = nebula::cpp2::ErrorCode::E_RAFT_LOG_GAP;
1574+
return;
1575+
}
1576+
auto prevLogTerm = wal_->getLogTerm(req.get_last_log_id_sent());
1577+
if (UNLIKELY(prevLogTerm == FileBasedWal::INVALID_TERM)) {
1578+
/*
1579+
At this point, the condition below established:
1580+
committedLogId <= req.get_last_log_id_sent() <= wal_->lastLogId()
1581+
1582+
When INVALID_TERM is returned, we failed to find the log of req.get_last_log_id_sent()
1583+
in wal. This usually happens the node has received a snapshot recently.
1584+
*/
1585+
if (req.get_last_log_id_sent() == committedLogId_ &&
1586+
req.get_last_log_term_sent() == committedLogTerm_) {
1587+
// Logs are matched of at log index of committedLogId_, and we could check remaing wal if
1588+
// there are any.
1589+
// The first log of wal must be committedLogId_ + 1, it can't be 0 (no wal) as well
1590+
// because it has been checked by case 2
1591+
DCHECK(wal_->firstLogId() == committedLogId_ + 1);
1592+
} else {
1593+
// case 3: checked by committedLogId_ and committedLogTerm_
1594+
// When log is not matched, we just return committedLogId_ and committedLogTerm_ instead
1595+
resp.last_matched_log_id_ref() = committedLogId_;
1596+
resp.last_matched_log_term() = committedLogTerm_;
1597+
resp.error_code() = nebula::cpp2::ErrorCode::E_RAFT_LOG_GAP;
1598+
return;
1599+
}
1600+
} else if (prevLogTerm != req.get_last_log_term_sent()) {
1601+
// case 3
15711602
resp.last_matched_log_id_ref() = committedLogId_;
15721603
resp.last_matched_log_term() = committedLogTerm_;
15731604
resp.error_code() = nebula::cpp2::ErrorCode::E_RAFT_LOG_GAP;
1574-
// lastMatchedLogId is committedLogId_
15751605
return;
15761606
}
15771607

src/kvstore/wal/FileBasedWal.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ size_t FileBasedWal::accessAllWalInfo(std::function<bool(WalFileInfoPtr info)> f
718718
}
719719

720720
TermID FileBasedWal::getLogTerm(LogID id) {
721-
TermID term = -1;
721+
TermID term = INVALID_TERM;
722722
auto iter = iterator(id, id);
723723
if (iter->valid()) {
724724
term = iter->logTerm();

src/kvstore/wal/FileBasedWal.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this<FileB
4949
friend class WalFileIterator;
5050

5151
public:
52+
/**
53+
* @brief Invalid term when wal not found, used in getLogTerm
54+
*/
55+
static constexpr TermID INVALID_TERM{-1};
56+
5257
// A factory method to create a new WAL
5358
/**
5459
* @brief Build the file based wal
@@ -110,7 +115,6 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this<FileB
110115
*/
111116
bool appendLog(LogID id, TermID term, ClusterID cluster, std::string msg) override;
112117

113-
//
114118
/**
115119
* @brief Append a list of log messages to the WAL. This method **IS NOT** thread-safe. We **DO
116120
* NOT** expect multiple threads will append logs simultaneously

0 commit comments

Comments
 (0)