Skip to content

Commit a28aec8

Browse files
committed
HBASE-22737 Add a new admin method and shell cmd to trigger the hbck chore to run (#425)
Signed-off-by: stack <stack@apache.org>
1 parent 0e02cee commit a28aec8

File tree

13 files changed

+177
-58
lines changed

13 files changed

+177
-58
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,24 @@
2424

2525
import org.apache.hadoop.conf.Configuration;
2626
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
27+
import org.apache.yetus.audience.InterfaceAudience;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
30+
2731
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
2832
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
2933
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
30-
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
34+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.AssignsResponse;
35+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureRequest;
36+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureResponse;
3137
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetTableStateResponse;
3238
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckService.BlockingInterface;
33-
import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
34-
import org.apache.yetus.audience.InterfaceAudience;
35-
import org.slf4j.Logger;
36-
import org.slf4j.LoggerFactory;
39+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreRequest;
40+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreResponse;
41+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ScheduleServerCrashProcedureResponse;
42+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.UnassignsResponse;
3743

44+
import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
3845

3946
/**
4047
* Use {@link ClusterConnection#getHbck()} to obtain an instance of {@link Hbck} instead of
@@ -103,9 +110,8 @@ public TableState setTableStateInMeta(TableState state) throws IOException {
103110
public List<Long> assigns(List<String> encodedRegionNames, boolean override)
104111
throws IOException {
105112
try {
106-
MasterProtos.AssignsResponse response =
107-
this.hbck.assigns(rpcControllerFactory.newController(),
108-
RequestConverter.toAssignRegionsRequest(encodedRegionNames, override));
113+
AssignsResponse response = this.hbck.assigns(rpcControllerFactory.newController(),
114+
RequestConverter.toAssignRegionsRequest(encodedRegionNames, override));
109115
return response.getPidList();
110116
} catch (ServiceException se) {
111117
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
@@ -117,9 +123,8 @@ public List<Long> assigns(List<String> encodedRegionNames, boolean override)
117123
public List<Long> unassigns(List<String> encodedRegionNames, boolean override)
118124
throws IOException {
119125
try {
120-
MasterProtos.UnassignsResponse response =
121-
this.hbck.unassigns(rpcControllerFactory.newController(),
122-
RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override));
126+
UnassignsResponse response = this.hbck.unassigns(rpcControllerFactory.newController(),
127+
RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override));
123128
return response.getPidList();
124129
} catch (ServiceException se) {
125130
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
@@ -135,13 +140,13 @@ private static String toCommaDelimitedString(List<String> list) {
135140
public List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override,
136141
boolean recursive)
137142
throws IOException {
138-
MasterProtos.BypassProcedureResponse response = ProtobufUtil.call(
139-
new Callable<MasterProtos.BypassProcedureResponse>() {
143+
BypassProcedureResponse response = ProtobufUtil.call(
144+
new Callable<BypassProcedureResponse>() {
140145
@Override
141-
public MasterProtos.BypassProcedureResponse call() throws Exception {
146+
public BypassProcedureResponse call() throws Exception {
142147
try {
143148
return hbck.bypassProcedure(rpcControllerFactory.newController(),
144-
MasterProtos.BypassProcedureRequest.newBuilder().addAllProcId(pids).
149+
BypassProcedureRequest.newBuilder().addAllProcId(pids).
145150
setWaitTime(waitTime).setOverride(override).setRecursive(recursive).build());
146151
} catch (Throwable t) {
147152
LOG.error(pids.stream().map(i -> i.toString()).
@@ -157,7 +162,7 @@ public MasterProtos.BypassProcedureResponse call() throws Exception {
157162
public List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> serverNames)
158163
throws IOException {
159164
try {
160-
MasterProtos.ScheduleServerCrashProcedureResponse response =
165+
ScheduleServerCrashProcedureResponse response =
161166
this.hbck.scheduleServerCrashProcedure(rpcControllerFactory.newController(),
162167
RequestConverter.toScheduleServerCrashProcedureRequest(serverNames));
163168
return response.getPidList();
@@ -169,4 +174,16 @@ public List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> serv
169174
throw new IOException(se);
170175
}
171176
}
177+
178+
@Override
179+
public boolean runHbckChore() throws IOException {
180+
try {
181+
RunHbckChoreResponse response = this.hbck.runHbckChore(rpcControllerFactory.newController(),
182+
RunHbckChoreRequest.newBuilder().build());
183+
return response.getRan();
184+
} catch (ServiceException se) {
185+
LOG.debug("Failed to run HBCK chore", se);
186+
throw new IOException(se);
187+
}
188+
}
172189
}

hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,12 @@ List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override,
111111

112112
List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> serverNames)
113113
throws IOException;
114+
115+
/**
116+
* Request HBCK chore to run at master side.
117+
*
118+
* @return <code>true</code> if HBCK chore ran, <code>false</code> if HBCK chore already running
119+
* @throws IOException if a remote or network exception occurs
120+
*/
121+
boolean runHbckChore() throws IOException;
114122
}

hbase-protocol-shaded/src/main/protobuf/Master.proto

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,13 @@ message IsNormalizerEnabledResponse {
350350
required bool enabled = 1;
351351
}
352352

353+
message RunHbckChoreRequest {
354+
}
355+
356+
message RunHbckChoreResponse {
357+
required bool ran = 1;
358+
}
359+
353360
message RunCatalogScanRequest {
354361
}
355362

@@ -1080,4 +1087,10 @@ service HbckService {
10801087
/** Schedule a ServerCrashProcedure to help recover a crash server */
10811088
rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest)
10821089
returns(ScheduleServerCrashProcedureResponse);
1090+
1091+
/**
1092+
* Request HBCK chore to run at master side.
1093+
*/
1094+
rpc RunHbckChore(RunHbckChoreRequest)
1095+
returns(RunHbckChoreResponse);
10831096
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ public void run() {
354354
private ClusterStatusChore clusterStatusChore;
355355
private ClusterStatusPublisher clusterStatusPublisherChore = null;
356356

357-
private HbckChecker hbckChecker;
357+
private HbckChore hbckChore;
358358
CatalogJanitor catalogJanitorChore;
359359

360360
private ReplicationZKNodeCleanerChore replicationZKNodeCleanerChore;
@@ -992,8 +992,8 @@ private void finishActiveMasterInitialization(MonitoredTask status)
992992
getChoreService().scheduleChore(normalizerChore);
993993
this.catalogJanitorChore = new CatalogJanitor(this);
994994
getChoreService().scheduleChore(catalogJanitorChore);
995-
this.hbckChecker = new HbckChecker(this);
996-
getChoreService().scheduleChore(hbckChecker);
995+
this.hbckChore = new HbckChore(this);
996+
getChoreService().scheduleChore(hbckChore);
997997

998998
// NAMESPACE READ!!!!
999999
// Here we expect hbase:namespace to be online. See inside initClusterSchemaService.
@@ -1465,8 +1465,8 @@ private void stopChores() {
14651465
if (this.snapshotQuotaChore != null) {
14661466
snapshotQuotaChore.cancel();
14671467
}
1468-
if (this.hbckChecker != null) {
1469-
hbckChecker.cancel();
1468+
if (this.hbckChore != null) {
1469+
hbckChore.cancel();
14701470
}
14711471
}
14721472

@@ -3790,7 +3790,7 @@ public static void decorateMasterConfiguration(Configuration conf) {
37903790
}
37913791
}
37923792

3793-
public HbckChecker getHbckChecker() {
3794-
return this.hbckChecker;
3793+
public HbckChore getHbckChore() {
3794+
return this.hbckChore;
37953795
}
37963796
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java renamed to hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChore.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@
4747
*/
4848
@InterfaceAudience.Private
4949
@InterfaceStability.Evolving
50-
public class HbckChecker extends ScheduledChore {
51-
private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName());
50+
public class HbckChore extends ScheduledChore {
51+
private static final Logger LOG = LoggerFactory.getLogger(HbckChore.class.getName());
5252

53-
private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval";
54-
private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000;
53+
private static final String HBCK_CHORE_INTERVAL = "hbase.master.hbck.chore.interval";
54+
private static final int DEFAULT_HBCK_CHORE_INTERVAL = 60 * 60 * 1000;
5555

5656
private final MasterServices master;
5757

@@ -100,14 +100,14 @@ public class HbckChecker extends ScheduledChore {
100100
private volatile long checkingStartTimestamp = 0;
101101
private volatile long checkingEndTimestamp = 0;
102102

103-
public HbckChecker(MasterServices master) {
104-
super("HbckChecker-", master,
105-
master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL));
103+
public HbckChore(MasterServices master) {
104+
super("HbckChore-", master,
105+
master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL));
106106
this.master = master;
107107
}
108108

109109
@Override
110-
protected void chore() {
110+
protected synchronized void chore() {
111111
running = true;
112112
regionInfoMap.clear();
113113
orphanRegionsOnRS.clear();
@@ -277,6 +277,6 @@ public long getCheckingStartTimestamp() {
277277
* Used for web ui to show when the HBCK checking report generated.
278278
*/
279279
public long getCheckingEndTimestamp() {
280-
return this.checkingStartTimestamp;
280+
return this.checkingEndTimestamp;
281281
}
282282
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@
236236
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanResponse;
237237
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleanerChoreRequest;
238238
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleanerChoreResponse;
239+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreRequest;
240+
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreResponse;
239241
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesRequest;
240242
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesResponse;
241243
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetBalancerRunningRequest;
@@ -2252,6 +2254,20 @@ public ClearDeadServersResponse clearDeadServers(RpcController controller,
22522254

22532255
// HBCK Services
22542256

2257+
@Override
2258+
public RunHbckChoreResponse runHbckChore(RpcController c, RunHbckChoreRequest req)
2259+
throws ServiceException {
2260+
rpcPreCheck("runHbckChore");
2261+
LOG.info("{} request HBCK chore to run", master.getClientIdAuditPrefix());
2262+
HbckChore hbckChore = master.getHbckChore();
2263+
boolean ran = false;
2264+
if (!hbckChore.isRunning()) {
2265+
hbckChore.chore();
2266+
ran = true;
2267+
}
2268+
return RunHbckChoreResponse.newBuilder().setRan(ran).build();
2269+
}
2270+
22552271
/**
22562272
* Update state of the table in meta only. This is required by hbck in some situations to cleanup
22572273
* stuck assign/ unassign regions procedures for the table.

hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import="java.time.ZonedDateTime"
2828
import="java.time.format.DateTimeFormatter"
2929
%>
30-
<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
30+
<%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
3131
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
3232
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
3333
<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
@@ -38,18 +38,18 @@
3838
<%
3939
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
4040
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
41-
HbckChecker hbckChecker = master.getHbckChecker();
41+
HbckChore hbckChore = master.getHbckChore();
4242
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
4343
Map<String, ServerName> orphanRegionsOnRS = null;
4444
List<String> orphanRegionsOnFS = null;
4545
long startTimestamp = 0;
4646
long endTimestamp = 0;
47-
if (hbckChecker != null) {
48-
inconsistentRegions = hbckChecker.getInconsistentRegions();
49-
orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS();
50-
orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS();
51-
startTimestamp = hbckChecker.getCheckingStartTimestamp();
52-
endTimestamp = hbckChecker.getCheckingEndTimestamp();
47+
if (hbckChore != null) {
48+
inconsistentRegions = hbckChore.getInconsistentRegions();
49+
orphanRegionsOnRS = hbckChore.getOrphanRegionsOnRS();
50+
orphanRegionsOnFS = hbckChore.getOrphanRegionsOnFS();
51+
startTimestamp = hbckChore.getCheckingStartTimestamp();
52+
endTimestamp = hbckChore.getCheckingEndTimestamp();
5353
}
5454
ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp),
5555
ZoneId.systemDefault());

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.hadoop.hbase.HBaseTestingUtility;
2929
import org.apache.hadoop.hbase.ServerName;
3030
import org.apache.hadoop.hbase.TableName;
31+
import org.apache.hadoop.hbase.master.HMaster;
3132
import org.apache.hadoop.hbase.master.RegionState;
3233
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
3334
import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
@@ -218,6 +219,20 @@ public void testScheduleSCP() throws Exception {
218219
LOG.info("pid is {}", pids.get(0));
219220
}
220221

222+
@Test
223+
public void testRunHbckChore() throws Exception {
224+
HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
225+
long endTimestamp = master.getHbckChore().getCheckingEndTimestamp();
226+
Hbck hbck = getHbck();
227+
boolean ran = false;
228+
while (!ran) {
229+
ran = hbck.runHbckChore();
230+
if (ran) {
231+
assertTrue(master.getHbckChore().getCheckingEndTimestamp() > endTimestamp);
232+
}
233+
}
234+
}
235+
221236
private void waitOnPids(List<Long> pids) {
222237
for (Long pid: pids) {
223238
while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().

0 commit comments

Comments
 (0)