Skip to content

Commit dc1f546

Browse files
committed
HBASE-26556 IT and Chaos Monkey improvements (#3932)
Signed-off-by: Josh Elser <elserj@apache.org> Reviewed-by: Tak Lon (Stephen) Wu <taklwu@apache.org> (cherry picked from commit a36d41a)
1 parent ec3558d commit dc1f546

File tree

4 files changed

+155
-37
lines changed

4 files changed

+155
-37
lines changed

hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.hadoop.conf.Configured;
2828
import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
2929
import org.apache.hadoop.hbase.util.Pair;
30+
import org.apache.hadoop.hbase.util.ReflectionUtils;
3031
import org.apache.hadoop.hbase.util.RetryCounter;
3132
import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
3233
import org.apache.hadoop.hbase.util.RetryCounterFactory;
@@ -216,7 +217,7 @@ protected String findPidCommand(ServiceType service) {
216217
}
217218

218219
public String signalCommand(ServiceType service, String signal) {
219-
return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
220+
return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal);
220221
}
221222
}
222223

@@ -322,7 +323,10 @@ protected CommandProvider getCommandProvider(ServiceType service) throws IOExcep
322323
case ZOOKEEPER_SERVER:
323324
return new ZookeeperShellCommandProvider(getConf());
324325
default:
325-
return new HBaseShellCommandProvider(getConf());
326+
Class<? extends CommandProvider> provider = getConf()
327+
.getClass("hbase.it.clustermanager.hbase.command.provider",
328+
HBaseShellCommandProvider.class, CommandProvider.class);
329+
return ReflectionUtils.newInstance(provider, getConf());
326330
}
327331
}
328332

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.chaos.factories;
19+
20+
import java.lang.reflect.Constructor;
21+
import java.util.function.Function;
22+
23+
import org.apache.hadoop.hbase.TableName;
24+
import org.apache.hadoop.hbase.chaos.actions.Action;
25+
import org.slf4j.Logger;
26+
import org.slf4j.LoggerFactory;
27+
28+
public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory {
29+
30+
private static final Logger LOG =
31+
LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class);
32+
33+
final static String HEAVY_ACTIONS = "heavy.actions";
34+
final static String TABLE_PARAM = "\\$table_name";
35+
36+
public enum SupportedTypes {
37+
FLOAT(p->Float.parseFloat(p)),
38+
LONG(p-> Long.parseLong(p)),
39+
INT(p-> Integer.parseInt(p)),
40+
TABLENAME(p-> TableName.valueOf(p));
41+
42+
final Function<String,Object> converter;
43+
44+
SupportedTypes(Function<String,Object> converter){
45+
this.converter = converter;
46+
}
47+
48+
Object convert(String param){
49+
return converter.apply(param);
50+
}
51+
}
52+
53+
@Override
54+
protected Action[] getHeavyWeightedActions() {
55+
String actions = this.properties.getProperty(HEAVY_ACTIONS);
56+
if(actions==null || actions.isEmpty()){
57+
return super.getHeavyWeightedActions();
58+
} else {
59+
try {
60+
String[] actionClasses = actions.split(";");
61+
Action[] heavyActions = new Action[actionClasses.length];
62+
for (int i = 0; i < actionClasses.length; i++) {
63+
heavyActions[i] = instantiateAction(actionClasses[i]);
64+
}
65+
LOG.info("Created actions {}", heavyActions);
66+
return heavyActions;
67+
} catch(Exception e) {
68+
LOG.error("Error trying to instantiate heavy actions. Returning null array.", e);
69+
}
70+
return null;
71+
}
72+
}
73+
74+
private Action instantiateAction(String actionString) throws Exception {
75+
final String packageName = "org.apache.hadoop.hbase.chaos.actions";
76+
String[] classAndParams = actionString.split("\\)")[0].split("\\(");
77+
String className = packageName + "." + classAndParams[0];
78+
String[] params = classAndParams[1].replaceAll(TABLE_PARAM,
79+
tableName.getNameAsString()).split(",");
80+
LOG.info("About to instantiate action class: {}; With constructor params: {}",
81+
className, params);
82+
Class<? extends Action> actionClass = (Class<? extends Action>)Class.forName(className);
83+
Constructor<? extends Action>[] constructors =
84+
(Constructor<? extends Action>[]) actionClass.getDeclaredConstructors();
85+
for(Constructor<? extends Action> c : constructors){
86+
if (c.getParameterCount() != params.length){
87+
continue;
88+
}
89+
Class[] paramTypes = c.getParameterTypes();
90+
Object[] constructorParams = new Object[paramTypes.length];
91+
for(int i=0; i<paramTypes.length; i++){
92+
constructorParams[i] = SupportedTypes.valueOf(paramTypes[i].getSimpleName().toUpperCase())
93+
.convert(params[i]);
94+
}
95+
return c.newInstance(constructorParams);
96+
}
97+
throw new IllegalArgumentException("Couldn't find any matching constructor for: " +
98+
actionString);
99+
}
100+
}

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyFactory.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ public MonkeyFactory setProperties(Properties props) {
7979
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
8080
public static final String DISTRIBUTED_ISSUES = "distributedIssues";
8181
public static final String DATA_ISSUES = "dataIssues";
82+
public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";
8283

8384
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
8485
.put(CALM, new CalmMonkeyFactory())
@@ -93,6 +94,7 @@ public MonkeyFactory setProperties(Properties props) {
9394
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
9495
.put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
9596
.put(DATA_ISSUES, new DataIssuesMonkeyFactory())
97+
.put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory())
9698
.build();
9799

98100
public static MonkeyFactory getFactory(String factoryName) {

hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -74,54 +74,66 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
7474
private long rollingBatchSuspendRSSleepTime;
7575
private float rollingBatchSuspendtRSRatio;
7676

77+
protected Action[] getLightWeightedActions(){
78+
return new Action[] {
79+
new CompactTableAction(tableName, compactTableRatio),
80+
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
81+
new FlushTableAction(tableName),
82+
new FlushRandomRegionOfTableAction(tableName),
83+
new MoveRandomRegionOfTableAction(tableName)
84+
};
85+
}
86+
87+
protected Action[] getMidWeightedActions(){
88+
return new Action[] {
89+
new SplitRandomRegionOfTableAction(tableName),
90+
new MergeRandomAdjacentRegionsOfTableAction(tableName),
91+
new SnapshotTableAction(tableName),
92+
new AddColumnAction(tableName),
93+
new RemoveColumnAction(tableName, columnFamilies),
94+
new ChangeEncodingAction(tableName),
95+
new ChangeCompressionAction(tableName),
96+
new ChangeBloomFilterAction(tableName),
97+
new ChangeVersionsAction(tableName),
98+
new ChangeSplitPolicyAction(tableName),
99+
};
100+
}
101+
102+
protected Action[] getHeavyWeightedActions() {
103+
return new Action[] {
104+
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
105+
tableName),
106+
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
107+
new RestartRandomRsAction(restartRandomRSSleepTime),
108+
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
109+
new RestartActiveMasterAction(restartActiveMasterSleepTime),
110+
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
111+
rollingBatchRestartRSRatio),
112+
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
113+
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
114+
new SplitAllRegionOfTableAction(tableName),
115+
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
116+
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
117+
rollingBatchSuspendtRSRatio)
118+
};
119+
}
120+
77121
@Override
78122
public ChaosMonkey build() {
79123

80124
loadProperties();
81125
// Actions such as compact/flush a table/region,
82126
// move one region around. They are not so destructive,
83127
// can be executed more frequently.
84-
Action[] actions1 = new Action[] {
85-
new CompactTableAction(tableName, compactTableRatio),
86-
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
87-
new FlushTableAction(tableName),
88-
new FlushRandomRegionOfTableAction(tableName),
89-
new MoveRandomRegionOfTableAction(tableName)
90-
};
128+
Action[] actions1 = getLightWeightedActions();
91129

92130
// Actions such as split/merge/snapshot.
93131
// They should not cause data loss, or unreliability
94132
// such as region stuck in transition.
95-
Action[] actions2 = new Action[] {
96-
new SplitRandomRegionOfTableAction(tableName),
97-
new MergeRandomAdjacentRegionsOfTableAction(tableName),
98-
new SnapshotTableAction(tableName),
99-
new AddColumnAction(tableName),
100-
new RemoveColumnAction(tableName, columnFamilies),
101-
new ChangeEncodingAction(tableName),
102-
new ChangeCompressionAction(tableName),
103-
new ChangeBloomFilterAction(tableName),
104-
new ChangeVersionsAction(tableName),
105-
new ChangeSplitPolicyAction(tableName),
106-
};
133+
Action[] actions2 = getMidWeightedActions();
107134

108135
// Destructive actions to mess things around.
109-
Action[] actions3 = new Action[] {
110-
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
111-
tableName),
112-
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
113-
new RestartRandomRsAction(restartRandomRSSleepTime),
114-
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
115-
new RestartActiveMasterAction(restartActiveMasterSleepTime),
116-
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
117-
rollingBatchRestartRSRatio),
118-
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
119-
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
120-
new SplitAllRegionOfTableAction(tableName),
121-
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
122-
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
123-
rollingBatchSuspendtRSRatio)
124-
};
136+
Action[] actions3 = getHeavyWeightedActions();
125137

126138
// Action to log more info for debugging
127139
Action[] actions4 = new Action[] {

0 commit comments

Comments
 (0)