From 40377fa522383ec73036ba07fe95a1e315e1c19e Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Jan 2014 15:06:01 +0100 Subject: [PATCH 1/6] Cluster: redis-trib set-timeout implemented. --- src/redis-trib.rb | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/redis-trib.rb b/src/redis-trib.rb index 81811f4b9..3b40b2f37 100755 --- a/src/redis-trib.rb +++ b/src/redis-trib.rb @@ -912,6 +912,34 @@ def delnode_cluster_cmd(argv,opt) node.r.shutdown end + def set_timeout_cluster_cmd(argv,opt) + timeout = argv[1].to_i + if timeout < 100 + puts "Setting a node timeout of less than 100 milliseconds is a bad idea." + exit 1 + end + + # Load cluster information + load_cluster_info_from_node(argv[0]) + ok_count = 0 + err_count = 0 + + # Send CLUSTER FORGET to all the nodes but the node to remove + xputs ">>> Reconfiguring node timeout in every cluster node..." + @nodes.each{|n| + begin + n.r.config("set","cluster-node-timeout",timeout) + n.r.config("rewrite") + ok_count += 1 + xputs "*** New timeout set for #{n}" + rescue => e + puts "ERR setting node-timeot for #{n}: #{e}" + err_count += 1 + end + } + xputs ">>> New node timeout set. #{ok_count} OK, #{err_count} ERR." + end + def help_cluster_cmd(argv,opt) show_help exit 0 @@ -952,8 +980,9 @@ def parse_options(cmd) "check" => ["check_cluster_cmd", 2, "host:port"], "fix" => ["fix_cluster_cmd", 2, "host:port"], "reshard" => ["reshard_cluster_cmd", 2, "host:port"], - "addnode" => ["addnode_cluster_cmd", 3, "new_host:new_port existing_host:existing_port"], - "delnode" => ["delnode_cluster_cmd", 3, "host:port node_id"], + "add-node" => ["addnode_cluster_cmd", 3, "new_host:new_port existing_host:existing_port"], + "del-node" => ["delnode_cluster_cmd", 3, "host:port node_id"], + "set-timeout" => ["set_timeout_cluster_cmd", 3, "host:port milliseconds"], "help" => ["help_cluster_cmd", 1, "(show this help)"] } @@ -966,14 +995,14 @@ def show_help puts "Usage: redis-trib \n\n" COMMANDS.each{|k,v| o = "" - puts " #{k.ljust(10)} #{v[2]}" + puts " #{k.ljust(15)} #{v[2]}" if ALLOWED_OPTIONS[k] ALLOWED_OPTIONS[k].each{|optname,has_arg| - puts " --#{optname}" + (has_arg ? " " : "") + puts " --#{optname}" + (has_arg ? " " : "") } end } - puts "\nFor check, fix, reshard, delnode, you can specify host:port of any working node.\n" + puts "\nFor check, fix, reshard, del-node, set-timeout you can specify the host and port of any working node in the cluster.\n" end # Sanity check From 23f4e9f0d91ee5845a9edab298c4b4a3c00d5f07 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 25 Jan 2014 11:53:53 +0100 Subject: [PATCH 2/6] Don't log MONITOR clients as disconnecting slaves. --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index bf16559ac..82799ab15 100644 --- a/src/networking.c +++ b/src/networking.c @@ -667,7 +667,7 @@ void freeClient(redisClient *c) { } /* Log link disconnection with slave */ - if (c->flags & REDIS_SLAVE) { + if ((c->flags & REDIS_SLAVE) && !(c->flags & REDIS_MONITOR)) { char ip[REDIS_IP_STR_LEN]; if (anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1) { From 72f1715e45c8e8418e158fb676b7e491d51ab9e7 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Jan 2014 10:10:56 +0100 Subject: [PATCH 3/6] Fixed inverted if condition in MISCONF error code path. --- src/redis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis.c b/src/redis.c index ef07d9b25..4805cdc80 100644 --- a/src/redis.c +++ b/src/redis.c @@ -2026,7 +2026,7 @@ int processCommand(redisClient *c) { if (server.stop_writes_on_bgsave_err && server.saveparamslen > 0 && server.lastbgsave_status == REDIS_ERR && - server.masterhost != NULL && + server.masterhost == NULL && (c->cmd->flags & REDIS_CMD_WRITE || c->cmd->proc == pingCommand)) { From 0b1b25c51cb6e4ed4469d413284d2a8a9051cf0d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Jan 2014 16:28:07 +0100 Subject: [PATCH 4/6] Cluster: introduced repl_offset fields in clusterNode. The two fields are used in order to remember the latest known replication offset and the time we received it from other slave nodes. This will be used by slaves in order to start the election procedure with a delay that is proportional to the rank of the slave among the other slaves for this master, when sorted for replication offset. Usually this allows the slave with the most updated offset to win the election and replace the failing master in the cluster. --- src/cluster.c | 2 ++ src/cluster.h | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index c37608616..bde8624d9 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -440,6 +440,8 @@ clusterNode *createClusterNode(char *nodename, int flags) { node->port = 0; node->fail_reports = listCreate(); node->voted_time = 0; + node->repl_offset_time = 0; + node->repl_offset = 0; listSetFreeMethod(node->fail_reports,zfree); return node; } diff --git a/src/cluster.h b/src/cluster.h index faba13477..673643686 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -61,10 +61,12 @@ struct clusterNode { int numslaves; /* Number of slave nodes, if this is a master */ struct clusterNode **slaves; /* pointers to slave nodes */ struct clusterNode *slaveof; /* pointer to the master node */ - mstime_t ping_sent; /* Unix time we sent latest ping */ - mstime_t pong_received; /* Unix time we received the pong */ - mstime_t fail_time; /* Unix time when FAIL flag was set */ - mstime_t voted_time; /* Last time we voted for a slave of this master */ + mstime_t ping_sent; /* Unix time we sent latest ping */ + mstime_t pong_received; /* Unix time we received the pong */ + mstime_t fail_time; /* Unix time when FAIL flag was set */ + mstime_t voted_time; /* Last time we voted for a slave of this master */ + mstime_t repl_offset_time; /* Unix time we received offset for this node */ + long long repl_offset; /* Last known repl offset for this node. */ char ip[REDIS_IP_STR_LEN]; /* Latest known IP address of this node */ int port; /* Latest known port of this node */ clusterLink *link; /* TCP/IP link with this node */ From 8b32bd483aecb3271aa3527ac869b87342a8a9b6 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Jan 2014 16:34:23 +0100 Subject: [PATCH 5/6] Cluster: limit cluster.h to 80 cols. --- src/cluster.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index 673643686..5cbef33d3 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -175,8 +175,9 @@ typedef struct { uint16_t type; /* Message type */ uint16_t count; /* Only used for some kind of messages. */ uint64_t currentEpoch; /* The epoch accordingly to the sending node. */ - uint64_t configEpoch; /* The config epoch if it's a master, or the last epoch - advertised by its master if it is a slave. */ + uint64_t configEpoch; /* The config epoch if it's a master, or the last + epoch advertised by its master if it is a + slave. */ char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */ unsigned char myslots[REDIS_CLUSTER_SLOTS/8]; char slaveof[REDIS_CLUSTER_NAMELEN]; @@ -190,7 +191,7 @@ typedef struct { #define CLUSTERMSG_MIN_LEN (sizeof(clusterMsg)-sizeof(union clusterMsgData)) -/* ----------------------- API exported outside cluster.c ------------------------- */ +/* ---------------------- API exported outside cluster.c -------------------- */ clusterNode *getNodeByQuery(redisClient *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask); #endif /* __REDIS_CLUSTER_H */ From befcf6259e1596661a454d1a1013e84178ac9426 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Jan 2014 16:51:50 +0100 Subject: [PATCH 6/6] Cluster: broadcast master/slave replication offset in bus header. --- src/cluster.c | 33 +++++++++++++++++++++++---------- src/cluster.h | 2 ++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index bde8624d9..4700cd30b 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1615,34 +1615,47 @@ void clusterBroadcastMessage(void *buf, size_t len) { /* Build the message header */ void clusterBuildMessageHdr(clusterMsg *hdr, int type) { int totlen = 0; - clusterNode *master; + uint64_t offset; + clusterNode *master, *myself = server.cluster->myself; /* If this node is a master, we send its slots bitmap and configEpoch. * If this node is a slave we send the master's information instead (the * node is flagged as slave so the receiver knows that it is NOT really * in charge for this slots. */ - master = (server.cluster->myself->flags & REDIS_NODE_SLAVE && - server.cluster->myself->slaveof) ? - server.cluster->myself->slaveof : server.cluster->myself; + master = (myself->flags & REDIS_NODE_SLAVE && myself->slaveof) ? + myself->slaveof : myself; memset(hdr,0,sizeof(*hdr)); hdr->type = htons(type); - memcpy(hdr->sender,server.cluster->myself->name,REDIS_CLUSTER_NAMELEN); + memcpy(hdr->sender,myself->name,REDIS_CLUSTER_NAMELEN); memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots)); memset(hdr->slaveof,0,REDIS_CLUSTER_NAMELEN); - if (server.cluster->myself->slaveof != NULL) { - memcpy(hdr->slaveof,server.cluster->myself->slaveof->name, - REDIS_CLUSTER_NAMELEN); - } + if (myself->slaveof != NULL) + memcpy(hdr->slaveof,myself->slaveof->name, REDIS_CLUSTER_NAMELEN); hdr->port = htons(server.port); - hdr->flags = htons(server.cluster->myself->flags); + hdr->flags = htons(myself->flags); hdr->state = server.cluster->state; /* Set the currentEpoch and configEpochs. */ hdr->currentEpoch = htonu64(server.cluster->currentEpoch); hdr->configEpoch = htonu64(master->configEpoch); + /* Set the replication offset. */ + if (myself->flags & REDIS_NODE_SLAVE) { + if (server.master) + offset = server.master->reploff; + else if (server.cached_master) + offset = server.cached_master->reploff; + else + offset = 0; + } else { + offset = server.master_repl_offset; + } + hdr->offset = htonu64(offset); + + /* Compute the message length for certain messages. For other messages + * this is up to the caller. */ if (type == CLUSTERMSG_TYPE_FAIL) { totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); totlen += sizeof(clusterMsgDataFail); diff --git a/src/cluster.h b/src/cluster.h index 5cbef33d3..2b8cf0e33 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -178,6 +178,8 @@ typedef struct { uint64_t configEpoch; /* The config epoch if it's a master, or the last epoch advertised by its master if it is a slave. */ + uint64_t offset; /* Master replication offset if node is a master or + processed replication offset if node is a slave. */ char sender[REDIS_CLUSTER_NAMELEN]; /* Name of the sender node */ unsigned char myslots[REDIS_CLUSTER_SLOTS/8]; char slaveof[REDIS_CLUSTER_NAMELEN];