Skip to content

Commit 1f9d748

Browse files
committed
fix a node can't accept more logs after receiving snapshot
1 parent 6975efd commit 1f9d748

File tree

4 files changed

+41
-6
lines changed

4 files changed

+41
-6
lines changed

src/kvstore/raftex/RaftLogIterator.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace raftex {
1919
class RaftLogIterator final : public LogIterator {
2020
public:
2121
/**
22-
* @brief Construct a new raf log iterator
22+
* @brief Construct a new raft log iterator
2323
*
2424
* @param firstLogId First log id in iterator
2525
* @param logEntries Log entries from rpc request

src/kvstore/raftex/RaftPart.cpp

+34-3
Original file line numberDiff line numberDiff line change
@@ -1545,12 +1545,43 @@ void RaftPart::processAppendLogRequest(const cpp2::AppendLogRequest& req,
15451545
// previously in fact. There are two choise: ask leader to send logs after committedLogId_ or
15461546
// just do nothing.
15471547
if (req.get_last_log_id_sent() < committedLogId_ ||
1548-
wal_->lastLogId() < req.get_last_log_id_sent() ||
1549-
wal_->getLogTerm(req.get_last_log_id_sent()) != req.get_last_log_term_sent()) {
1548+
wal_->lastLogId() < req.get_last_log_id_sent()) {
1549+
// case 1 and case 2
1550+
resp.last_matched_log_id_ref() = committedLogId_;
1551+
resp.last_matched_log_term() = committedLogTerm_;
1552+
resp.error_code() = nebula::cpp2::ErrorCode::E_RAFT_LOG_GAP;
1553+
return;
1554+
}
1555+
auto prevLogTerm = wal_->getLogTerm(req.get_last_log_id_sent());
1556+
if (UNLIKELY(prevLogTerm == FileBasedWal::INVALID_TERM)) {
1557+
/*
1558+
At this point, the condition below established:
1559+
committedLogId <= req.get_last_log_id_sent() <= wal_->lastLogId()
1560+
1561+
When INVALID_TERM is returned, we failed to find the log of req.get_last_log_id_sent()
1562+
in wal. This usually happens the node has received a snapshot recently, so the wal is
1563+
absent.
1564+
*/
1565+
if (req.get_last_log_id_sent() == committedLogId_ &&
1566+
req.get_last_log_term_sent() == committedLogTerm_) {
1567+
// Logs are matched of at log index of committedLogId_, and we could check remaing wal if
1568+
// there are any.
1569+
// The first log of wal must be or committedLogId_ + 1, it can't be 0 (no wal) as well
1570+
// because it has been checked by case 2
1571+
DCHECK(wal_->firstLogId() == committedLogId_ + 1);
1572+
} else {
1573+
// case 3: checked by committedLogId_ and committedLogTerm_
1574+
// When log is not matched, we just return committedLogId_ and committedLogTerm_ instead
1575+
resp.last_matched_log_id_ref() = committedLogId_;
1576+
resp.last_matched_log_term() = committedLogTerm_;
1577+
resp.error_code() = nebula::cpp2::ErrorCode::E_RAFT_LOG_GAP;
1578+
return;
1579+
}
1580+
} else if (prevLogTerm != req.get_last_log_term_sent()) {
1581+
// case 3
15501582
resp.last_matched_log_id_ref() = committedLogId_;
15511583
resp.last_matched_log_term() = committedLogTerm_;
15521584
resp.error_code() = nebula::cpp2::ErrorCode::E_RAFT_LOG_GAP;
1553-
// lastMatchedLogId is committedLogId_
15541585
return;
15551586
}
15561587

src/kvstore/wal/FileBasedWal.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ size_t FileBasedWal::accessAllWalInfo(std::function<bool(WalFileInfoPtr info)> f
711711
}
712712

713713
TermID FileBasedWal::getLogTerm(LogID id) {
714-
TermID term = -1;
714+
TermID term = INVALID_TERM;
715715
auto iter = iterator(id, id);
716716
if (iter->valid()) {
717717
term = iter->logTerm();

src/kvstore/wal/FileBasedWal.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this<FileB
4949
friend class WalFileIterator;
5050

5151
public:
52+
/**
53+
* @brief Invalid term when wal not found, used in getLogTerm
54+
*/
55+
static constexpr TermID INVALID_TERM{-1};
56+
5257
// A factory method to create a new WAL
5358
/**
5459
* @brief Build the file based wal
@@ -110,7 +115,6 @@ class FileBasedWal final : public Wal, public std::enable_shared_from_this<FileB
110115
*/
111116
bool appendLog(LogID id, TermID term, ClusterID cluster, std::string msg) override;
112117

113-
//
114118
/**
115119
* @brief Append a list of log messages to the WAL. This method **IS NOT** thread-safe. We **DO
116120
* NOT** expect multiple threads will append logs simultaneously

0 commit comments

Comments
 (0)