Skip to content

Commit d281d11

Browse files
committed
redis cluster del node: forget + shutdown
1 parent b203210 commit d281d11

File tree

7 files changed

+342
-5
lines changed

7 files changed

+342
-5
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package com.sohu.cache.constant;
2+
3+
4+
/**
5+
* Cluster Operate Result
6+
* @author leifu
7+
* @Date 2017年6月27日
8+
* @Time 上午8:43:10
9+
*/
10+
public class ClusterOperateResult {
11+
12+
private int status;
13+
14+
private String message;
15+
16+
public ClusterOperateResult(int status, String message) {
17+
this.status = status;
18+
this.message = message;
19+
}
20+
21+
public static ClusterOperateResult success() {
22+
return new ClusterOperateResult(1, "");
23+
}
24+
25+
public static ClusterOperateResult fail(String message) {
26+
return new ClusterOperateResult(0, message);
27+
}
28+
29+
public boolean isSuccess() {
30+
return status == 1;
31+
}
32+
33+
public int getStatus() {
34+
return status;
35+
}
36+
37+
public String getMessage() {
38+
return message;
39+
}
40+
41+
@Override
42+
public String toString() {
43+
return "ClusterOperateResult [status=" + status + ", message=" + message + "]";
44+
}
45+
46+
}

cachecloud-open-web/src/main/java/com/sohu/cache/redis/RedisCenter.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,14 @@ public Map<RedisConstant, Map<String, Object>> collectRedisInfo(long appId, long
107107
* @return 主返回true,从返回false;
108108
*/
109109
public Boolean isMaster(long appId, String ip, int port);
110+
111+
/**
112+
* 根据ip和port判断redis实例当前是否有从节点
113+
* @param ip ip
114+
* @param port port
115+
* @return 主返回true,从返回false;
116+
*/
117+
public Boolean hasSlaves(long appId, String ip, int port);
110118

111119
/**
112120
* 获取从节点的主节点地址

cachecloud-open-web/src/main/java/com/sohu/cache/redis/RedisDeployCenter.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.util.List;
44

5+
import com.sohu.cache.constant.ClusterOperateResult;
56
import com.sohu.cache.entity.AppDesc;
67
import com.sohu.cache.web.enums.RedisOperateEnum;
78

@@ -122,6 +123,22 @@ public interface RedisDeployCenter {
122123
*/
123124
public boolean clusterFailover(long appId, int slaveInstanceId, String failoverParam) throws Exception;
124125

126+
/**
127+
* 检查是否具备forget的条件
128+
* @param appId
129+
* @param forgetInstanceId
130+
* @return
131+
*/
132+
public ClusterOperateResult checkClusterForget(Long appId, int forgetInstanceId);
133+
134+
/**
135+
* 删除节点
136+
* @param appId
137+
* @param delNodeInstanceId
138+
* @return
139+
*/
140+
public ClusterOperateResult delNode(Long appId, int delNodeInstanceId);
141+
125142

126143

127144
}

cachecloud-open-web/src/main/java/com/sohu/cache/redis/impl/RedisCenterImpl.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,27 @@ private Map<RedisConstant, Map<String, Object>> processRedisStats(String statRes
703703
}
704704
return redisStatMap;
705705
}
706+
707+
/**
708+
* 根据infoMap的结果判断实例的主从
709+
*
710+
* @param infoMap
711+
* @return
712+
*/
713+
private Boolean hasSlaves(Map<RedisConstant, Map<String, Object>> infoMap) {
714+
Map<String, Object> replicationMap = infoMap.get(RedisConstant.Replication);
715+
if (MapUtils.isEmpty(replicationMap)) {
716+
return null;
717+
}
718+
for (Entry<String, Object> entry : replicationMap.entrySet()) {
719+
String key = entry.getKey();
720+
//判断一个即可
721+
if (key != null && key.contains("slave0")) {
722+
return true;
723+
}
724+
}
725+
return false;
726+
}
706727

707728
/**
708729
* 根据infoMap的结果判断实例的主从
@@ -742,6 +763,26 @@ public Boolean isMaster(long appId, String ip, int port) {
742763
jedis.close();
743764
}
744765
}
766+
767+
/**
768+
* 根据ip和port判断redis实例当前是否有从节点
769+
* @param ip ip
770+
* @param port port
771+
* @return 主返回true,从返回false;
772+
*/
773+
public Boolean hasSlaves(long appId, String ip, int port) {
774+
Jedis jedis = getJedis(appId, ip, port, REDIS_DEFAULT_TIME, REDIS_DEFAULT_TIME);
775+
try {
776+
String info = jedis.info("all");
777+
Map<RedisConstant, Map<String, Object>> infoMap = processRedisStats(info);
778+
return hasSlaves(infoMap);
779+
} catch (Exception e) {
780+
logger.error(e.getMessage(), e);
781+
return null;
782+
} finally {
783+
jedis.close();
784+
}
785+
}
745786

746787
@Override
747788
public HostAndPort getMaster(String ip, int port, String password) {

cachecloud-open-web/src/main/java/com/sohu/cache/redis/impl/RedisDeployCenterImpl.java

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package com.sohu.cache.redis.impl;
22

3+
import com.sohu.cache.constant.ClusterOperateResult;
34
import com.sohu.cache.constant.InstanceStatusEnum;
45
import com.sohu.cache.dao.AppDao;
56
import com.sohu.cache.dao.InstanceDao;
67
import com.sohu.cache.dao.MachineDao;
78
import com.sohu.cache.entity.AppDesc;
89
import com.sohu.cache.entity.InstanceInfo;
10+
import com.sohu.cache.entity.InstanceSlotModel;
911
import com.sohu.cache.entity.MachineInfo;
1012
import com.sohu.cache.machine.MachineCenter;
1113
import com.sohu.cache.protocol.MachineProtocol;
@@ -15,6 +17,7 @@
1517
import com.sohu.cache.redis.RedisConfigTemplateService;
1618
import com.sohu.cache.redis.RedisDeployCenter;
1719
import com.sohu.cache.redis.enums.RedisConfigEnum;
20+
import com.sohu.cache.stats.instance.InstanceDeployCenter;
1821
import com.sohu.cache.util.ConstUtils;
1922
import com.sohu.cache.util.IdempotentConfirmer;
2023
import com.sohu.cache.util.TypeUtil;
@@ -51,6 +54,8 @@ public class RedisDeployCenterImpl implements RedisDeployCenter {
5154

5255
private RedisConfigTemplateService redisConfigTemplateService;
5356

57+
private InstanceDeployCenter instanceDeployCenter;
58+
5459
@Override
5560
public boolean deployClusterInstance(long appId, List<RedisClusterNode> clusterNodes, int maxMemory) {
5661
if (!isExist(appId)) {
@@ -975,7 +980,7 @@ public boolean clusterFailover(final long appId, int slaveInstanceId, final Stri
975980
Assert.isTrue(appDesc != null);
976981
int type = appDesc.getType();
977982
if (!TypeUtil.isRedisCluster(type)) {
978-
logger.error("{} is not redis type", appDesc);
983+
logger.error("{} is not redis cluster type", appDesc);
979984
return false;
980985
}
981986
InstanceInfo instanceInfo = instanceDao.getInstanceInfoById(slaveInstanceId);
@@ -1007,6 +1012,115 @@ public boolean execute() {
10071012
}
10081013
return true;
10091014
}
1015+
1016+
@Override
1017+
public ClusterOperateResult delNode(final Long appId, int delNodeInstanceId) {
1018+
final InstanceInfo forgetInstanceInfo = instanceDao.getInstanceInfoById(delNodeInstanceId);
1019+
final String forgetNodeId = redisCenter.getNodeId(appId, forgetInstanceInfo.getIp(),
1020+
forgetInstanceInfo.getPort());
1021+
if (StringUtils.isBlank(forgetNodeId)) {
1022+
logger.warn("{} nodeId is null", forgetInstanceInfo.getHostPort());
1023+
return ClusterOperateResult.fail(String.format("%s nodeId is null", forgetInstanceInfo.getHostPort()));
1024+
}
1025+
List<InstanceInfo> instanceInfos = instanceDao.getInstListByAppId(appId);
1026+
for (InstanceInfo instanceInfo : instanceInfos) {
1027+
if (instanceInfo == null) {
1028+
continue;
1029+
}
1030+
if (instanceInfo.isOffline()) {
1031+
continue;
1032+
}
1033+
// 过滤当前节点
1034+
if (forgetInstanceInfo.getHostPort().equals(instanceInfo.getHostPort())) {
1035+
continue;
1036+
}
1037+
final String instanceHost = instanceInfo.getIp();
1038+
final int instancePort = instanceInfo.getPort();
1039+
boolean isForget = new IdempotentConfirmer() {
1040+
@Override
1041+
public boolean execute() {
1042+
String response = null;
1043+
Jedis jedis = null;
1044+
try {
1045+
jedis = redisCenter.getJedis(appId, instanceHost, instancePort);
1046+
logger.warn("{}:{} is forgetting {}", instanceHost, instancePort, forgetNodeId);
1047+
response = jedis.clusterForget(forgetNodeId);
1048+
boolean success = response != null && response.equalsIgnoreCase("OK");
1049+
logger.warn("{}:{} is forgetting {} result is {}", instanceHost, instancePort, forgetNodeId,
1050+
success);
1051+
return success;
1052+
} catch (Exception e) {
1053+
logger.error(e.getMessage());
1054+
} finally {
1055+
if (jedis != null) {
1056+
jedis.close();
1057+
}
1058+
}
1059+
return response != null && response.equalsIgnoreCase("OK");
1060+
}
1061+
}.run();
1062+
if (!isForget) {
1063+
logger.warn("{}:{} forget {} failed", instanceHost, instancePort, forgetNodeId);
1064+
return ClusterOperateResult.fail(String.format("%s:%s forget %s failed", instanceHost, instancePort, forgetNodeId));
1065+
}
1066+
}
1067+
1068+
// shutdown
1069+
boolean isShutdown = instanceDeployCenter.shutdownExistInstance(appId, delNodeInstanceId);
1070+
if (!isShutdown) {
1071+
logger.warn("{} shutdown failed", forgetInstanceInfo.getHostPort());
1072+
return ClusterOperateResult.fail(String.format("%s shutdown failed", forgetInstanceInfo.getHostPort()));
1073+
}
1074+
1075+
return ClusterOperateResult.success();
1076+
}
1077+
1078+
1079+
/**
1080+
* 1. 被forget的节点必须在线(这个条件有待验证)
1081+
* 2. 被forget的节点不能有从节点
1082+
* 3. 被forget的节点不能有slots
1083+
*/
1084+
@Override
1085+
public ClusterOperateResult checkClusterForget(Long appId, int forgetInstanceId) {
1086+
// 0.各种验证
1087+
Assert.isTrue(appId > 0);
1088+
Assert.isTrue(forgetInstanceId > 0);
1089+
AppDesc appDesc = appDao.getAppDescById(appId);
1090+
Assert.isTrue(appDesc != null);
1091+
int type = appDesc.getType();
1092+
if (!TypeUtil.isRedisCluster(type)) {
1093+
logger.error("{} is not redis cluster type", appDesc);
1094+
return ClusterOperateResult.fail(String.format("instanceId: %s must be cluster type", forgetInstanceId));
1095+
}
1096+
InstanceInfo instanceInfo = instanceDao.getInstanceInfoById(forgetInstanceId);
1097+
Assert.isTrue(instanceInfo != null);
1098+
String forgetHost = instanceInfo.getIp();
1099+
int forgetPort = instanceInfo.getPort();
1100+
// 1.是否在线
1101+
boolean isRun = redisCenter.isRun(appId, forgetHost, forgetPort);
1102+
if (!isRun) {
1103+
logger.warn("{}:{} is not run", forgetHost, forgetPort);
1104+
return ClusterOperateResult.fail(String.format("被forget的节点(%s:%s)必须在线", forgetHost, forgetPort));
1105+
}
1106+
// 2.被forget的节点不能有从节点
1107+
Boolean hasSlaves = redisCenter.hasSlaves(appId, forgetHost, forgetPort);
1108+
if (hasSlaves == null || hasSlaves) {
1109+
logger.warn("{}:{} has slave", forgetHost, forgetPort);
1110+
return ClusterOperateResult.fail(String.format("被forget的节点(%s:%s)不能有从节点", forgetHost, forgetPort));
1111+
}
1112+
1113+
// 3.被forget的节点不能有slots
1114+
Map<String, InstanceSlotModel> clusterSlotsMap = redisCenter.getClusterSlotsMap(appId);
1115+
InstanceSlotModel instanceSlotModel = clusterSlotsMap.get(instanceInfo.getHostPort());
1116+
if (instanceSlotModel != null && instanceSlotModel.getSlotList() != null
1117+
&& instanceSlotModel.getSlotList().size() > 0) {
1118+
logger.warn("{}:{} has slots", forgetHost, forgetPort);
1119+
return ClusterOperateResult.fail(String.format("被forget的节点(%s:%s)不能持有slot", forgetHost, forgetPort));
1120+
}
1121+
1122+
return ClusterOperateResult.success();
1123+
}
10101124

10111125
/**
10121126
* 拷贝redis配置

cachecloud-open-web/src/main/java/com/sohu/cache/web/controller/AppManageController.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.sohu.cache.web.enums.RedisOperateEnum;
44
import com.sohu.cache.constant.AppCheckEnum;
5+
import com.sohu.cache.constant.ClusterOperateResult;
56
import com.sohu.cache.constant.DataFormatCheckResult;
67
import com.sohu.cache.constant.ErrorMessageEnum;
78
import com.sohu.cache.constant.HorizontalResult;
@@ -625,6 +626,46 @@ public ModelAndView appInfoAndAudit(HttpServletRequest request, HttpServletRespo
625626
}
626627
return new ModelAndView("manage/appOps/appInfoAndAudit");
627628
}
629+
630+
/**
631+
* redisCluster节点删除: forget + shutdown
632+
*
633+
* @param appId 应用id
634+
* @param forgetInstanceId 需要被forget的节点
635+
* @return
636+
*/
637+
@RequestMapping("/clusterDelNode")
638+
public ModelAndView clusterDelNode(HttpServletRequest request, HttpServletResponse response, Model model, Long appId,
639+
int delNodeInstanceId) {
640+
AppUser appUser = getUserInfo(request);
641+
logger.warn("user {}, clusterForget: appId:{}, instanceId:{}", appUser.getName(), appId, delNodeInstanceId);
642+
// 检测forget条件
643+
ClusterOperateResult checkClusterForgetResult = null;
644+
try {
645+
checkClusterForgetResult = redisDeployCenter.checkClusterForget(appId, delNodeInstanceId);
646+
} catch (Exception e) {
647+
logger.error(e.getMessage(), e);
648+
}
649+
if (checkClusterForgetResult == null || !checkClusterForgetResult.isSuccess()) {
650+
model.addAttribute("success", checkClusterForgetResult.getStatus());
651+
model.addAttribute("message", checkClusterForgetResult.getMessage());
652+
return new ModelAndView("");
653+
}
654+
655+
// 执行delnode:forget + shutdown
656+
ClusterOperateResult delNodeResult = null;
657+
try {
658+
delNodeResult = redisDeployCenter.delNode(appId, delNodeInstanceId);
659+
} catch (Exception e) {
660+
logger.error(e.getMessage(), e);
661+
}
662+
model.addAttribute("success", delNodeResult.getStatus());
663+
model.addAttribute("message", delNodeResult.getMessage());
664+
logger.warn("user {}, clusterForget: appId:{}, instanceId:{}, result is {}", appUser.getName(), appId, delNodeInstanceId, delNodeResult.getStatus());
665+
666+
return new ModelAndView("");
667+
668+
}
628669

629670
/**
630671
* redisCluster从节点failover

0 commit comments

Comments
 (0)