Skip to content

Commit

Permalink
[Raft] make me crazy (vesoft-inc#3172)
Browse files Browse the repository at this point in the history
* cherry

* minor log changes, fix a part is reset more than expected during waiting for snapshot

* optimize when rpc timeout

* * no more weird "has been sended" and "self->lastLogIdSent_ = self->logIdToSend_ - 1;"
* do not send pending wals when leader is sending a snapshot
* unify E_LOG_GAP and E_LOG_STALE

* wal_not_found

* * getPendingReqIfAny
* unify SUCCEEDED E_LOG_GAP E_LOG_STALE

* election don't wait forever

* handle elelction resp in worker thread

* fix deadlock

* resolve conflicts, simplify, add a bit more comments

* fix pytest

* fix pytest again

Co-authored-by: yaphet <4414314+darionyaphet@users.noreply.github.com>
  • Loading branch information
critical27 and darionyaphet authored Nov 18, 2021
1 parent 07d5e21 commit 9ebc49d
Show file tree
Hide file tree
Showing 18 changed files with 385 additions and 539 deletions.
2 changes: 1 addition & 1 deletion conf/nebula-storaged.conf.production
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
--enable_rocksdb_whole_key_filtering=false

############### misc ####################
--snapshot_part_rate_limit=8388608
--snapshot_part_rate_limit=10485760
--snapshot_batch_size=1048576
--rebuild_index_part_rate_limit=4194304
--rebuild_index_batch_size=1048576
2 changes: 1 addition & 1 deletion src/common/base/SlowOpTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
#include "common/base/Base.h"
#include "common/time/WallClock.h"

DEFINE_int64(slow_op_threshhold_ms, 50, "default threshhold for slow operation");
DEFINE_int64(slow_op_threshhold_ms, 100, "default threshhold for slow operation");
40 changes: 18 additions & 22 deletions src/interface/raftex.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,27 @@ cpp_include "common/thrift/ThriftTypes.h"
enum ErrorCode {
SUCCEEDED = 0;

E_LOG_GAP = -1;
E_LOG_STALE = -2;
E_MISSING_COMMIT = -3;
E_WAITING_SNAPSHOT = -4; // The follower is waiting a snapshot

E_UNKNOWN_PART = -5;
E_TERM_OUT_OF_DATE = -6;
E_LAST_LOG_TERM_TOO_OLD = -7;
E_BAD_STATE = -8;
E_WRONG_LEADER = -9;
E_WAL_FAIL = -10;
E_NOT_READY = -11;
E_UNKNOWN_PART = -1;

// Local errors
E_HOST_STOPPED = -12;
E_NOT_A_LEADER = -13;
E_HOST_DISCONNECTED = -14;
E_TOO_MANY_REQUESTS = -15;
E_PERSIST_SNAPSHOT_FAILED = -16;
// Raft consensus errors
E_LOG_GAP = -2;
E_LOG_STALE = -3;
E_TERM_OUT_OF_DATE = -4;

E_BAD_ROLE = -17,
// Raft state errors
E_WAITING_SNAPSHOT = -5; // The follower is waiting a snapshot
E_BAD_STATE = -6;
E_WRONG_LEADER = -7;
E_NOT_READY = -8;
E_BAD_ROLE = -9,

E_EXCEPTION = -20; // An thrift internal exception was thrown
// Local errors
E_WAL_FAIL = -10;
E_HOST_STOPPED = -11;
E_TOO_MANY_REQUESTS = -12;
E_PERSIST_SNAPSHOT_FAILED = -13;
E_RPC_EXCEPTION = -14; // An thrift internal exception was thrown
E_NO_WAL_FOUND = -15;
}

typedef i64 (cpp.type = "nebula::ClusterID") ClusterID
Expand Down Expand Up @@ -103,8 +101,6 @@ struct AppendLogRequest {
//
10: TermID log_term;
11: list<LogEntry> log_str_list;

12: bool sending_snapshot;
}


Expand Down
4 changes: 2 additions & 2 deletions src/kvstore/NebulaSnapshotManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "kvstore/RateLimiter.h"

DEFINE_uint32(snapshot_part_rate_limit,
1024 * 1024 * 8,
1024 * 1024 * 10,
"max bytes of pulling snapshot for each partition in one second");
DEFINE_uint32(snapshot_batch_size, 1024 * 512, "batch size for snapshot, in bytes");

Expand All @@ -21,7 +21,7 @@ const int32_t kReserveNum = 1024 * 4;

NebulaSnapshotManager::NebulaSnapshotManager(NebulaStore* kv) : store_(kv) {
// Snapshot rate is limited to FLAGS_snapshot_worker_threads * FLAGS_snapshot_part_rate_limit.
// So by default, the total send rate is limited to 4 * 8Mb = 32Mb.
// So by default, the total send rate is limited to 4 * 10Mb = 40Mb.
LOG(INFO) << "Send snapshot is rate limited to " << FLAGS_snapshot_part_rate_limit
<< " for each part by default";
}
Expand Down
Loading

0 comments on commit 9ebc49d

Please sign in to comment.