Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,15 @@

import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.AssignsResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BypassProcedureResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetTableStateResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.HbckService.BlockingInterface;

import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ScheduleServerCrashProcedureResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.UnassignsResponse;

/**
* Use {@link Connection#getHbck()} to obtain an instance of {@link Hbck} instead of
Expand Down Expand Up @@ -105,9 +110,8 @@ public TableState setTableStateInMeta(TableState state) throws IOException {
public List<Long> assigns(List<String> encodedRegionNames, boolean override)
throws IOException {
try {
MasterProtos.AssignsResponse response =
this.hbck.assigns(rpcControllerFactory.newController(),
RequestConverter.toAssignRegionsRequest(encodedRegionNames, override));
AssignsResponse response = this.hbck.assigns(rpcControllerFactory.newController(),
RequestConverter.toAssignRegionsRequest(encodedRegionNames, override));
return response.getPidList();
} catch (ServiceException se) {
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
Expand All @@ -119,9 +123,8 @@ public List<Long> assigns(List<String> encodedRegionNames, boolean override)
public List<Long> unassigns(List<String> encodedRegionNames, boolean override)
throws IOException {
try {
MasterProtos.UnassignsResponse response =
this.hbck.unassigns(rpcControllerFactory.newController(),
RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override));
UnassignsResponse response = this.hbck.unassigns(rpcControllerFactory.newController(),
RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override));
return response.getPidList();
} catch (ServiceException se) {
LOG.debug(toCommaDelimitedString(encodedRegionNames), se);
Expand All @@ -137,13 +140,13 @@ private static String toCommaDelimitedString(List<String> list) {
public List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override,
boolean recursive)
throws IOException {
MasterProtos.BypassProcedureResponse response = ProtobufUtil.call(
new Callable<MasterProtos.BypassProcedureResponse>() {
BypassProcedureResponse response = ProtobufUtil.call(
new Callable<BypassProcedureResponse>() {
@Override
public MasterProtos.BypassProcedureResponse call() throws Exception {
public BypassProcedureResponse call() throws Exception {
try {
return hbck.bypassProcedure(rpcControllerFactory.newController(),
MasterProtos.BypassProcedureRequest.newBuilder().addAllProcId(pids).
BypassProcedureRequest.newBuilder().addAllProcId(pids).
setWaitTime(waitTime).setOverride(override).setRecursive(recursive).build());
} catch (Throwable t) {
LOG.error(pids.stream().map(i -> i.toString()).
Expand All @@ -159,7 +162,7 @@ public MasterProtos.BypassProcedureResponse call() throws Exception {
public List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames)
throws IOException {
try {
MasterProtos.ScheduleServerCrashProcedureResponse response =
ScheduleServerCrashProcedureResponse response =
this.hbck.scheduleServerCrashProcedure(rpcControllerFactory.newController(),
RequestConverter.toScheduleServerCrashProcedureRequest(serverNames));
return response.getPidList();
Expand All @@ -171,4 +174,16 @@ public List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames)
throw new IOException(se);
}
}
}

@Override
public boolean runHbckChore() throws IOException {
try {
RunHbckChoreResponse response = this.hbck.runHbckChore(rpcControllerFactory.newController(),
RunHbckChoreRequest.newBuilder().build());
return response.getRan();
} catch (ServiceException se) {
LOG.debug("Failed to run HBCK chore", se);
throw new IOException(se);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,12 @@ default List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> ser
}

List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames) throws IOException;

/**
* Request HBCK chore to run at master side.
*
* @return <code>true</code> if HBCK chore ran, <code>false</code> if HBCK chore already running
* @throws IOException if a remote or network exception occurs
*/
boolean runHbckChore() throws IOException;
}
13 changes: 13 additions & 0 deletions hbase-protocol-shaded/src/main/protobuf/Master.proto
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,13 @@ message IsNormalizerEnabledResponse {
required bool enabled = 1;
}

message RunHbckChoreRequest {
}

message RunHbckChoreResponse {
required bool ran = 1;
}

message RunCatalogScanRequest {
}

Expand Down Expand Up @@ -1138,4 +1145,10 @@ service HbckService {
/** Schedule a ServerCrashProcedure to help recover a crash server */
rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest)
returns(ScheduleServerCrashProcedureResponse);

/**
* Request HBCK chore to run at master side.
*/
rpc RunHbckChore(RunHbckChoreRequest)
returns(RunHbckChoreResponse);
}
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ public void run() {
private ClusterStatusPublisher clusterStatusPublisherChore = null;
private SnapshotCleanerChore snapshotCleanerChore = null;

private HbckChecker hbckChecker;
private HbckChore hbckChore;
CatalogJanitor catalogJanitorChore;
private LogCleaner logCleaner;
private HFileCleaner hfileCleaner;
Expand Down Expand Up @@ -1109,8 +1109,8 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc
getChoreService().scheduleChore(normalizerChore);
this.catalogJanitorChore = new CatalogJanitor(this);
getChoreService().scheduleChore(catalogJanitorChore);
this.hbckChecker = new HbckChecker(this);
getChoreService().scheduleChore(hbckChecker);
this.hbckChore = new HbckChore(this);
getChoreService().scheduleChore(hbckChore);
this.serverManager.startChore();

// Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
Expand Down Expand Up @@ -1590,7 +1590,7 @@ private void stopChores() {
choreService.cancelChore(this.hfileCleaner);
choreService.cancelChore(this.replicationBarrierCleaner);
choreService.cancelChore(this.snapshotCleanerChore);
choreService.cancelChore(this.hbckChecker);
choreService.cancelChore(this.hbckChore);
}
}

Expand Down Expand Up @@ -3761,7 +3761,7 @@ public Map<String, ReplicationStatus> getWalGroupsReplicationStatus() {
return super.getWalGroupsReplicationStatus();
}

public HbckChecker getHbckChecker() {
return this.hbckChecker;
public HbckChore getHbckChore() {
return this.hbckChore;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class HbckChecker extends ScheduledChore {
private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName());
public class HbckChore extends ScheduledChore {
private static final Logger LOG = LoggerFactory.getLogger(HbckChore.class.getName());

private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval";
private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000;
private static final String HBCK_CHORE_INTERVAL = "hbase.master.hbck.chore.interval";
private static final int DEFAULT_HBCK_CHORE_INTERVAL = 60 * 60 * 1000;

private final MasterServices master;

Expand Down Expand Up @@ -100,14 +100,14 @@ public class HbckChecker extends ScheduledChore {
private volatile long checkingStartTimestamp = 0;
private volatile long checkingEndTimestamp = 0;

public HbckChecker(MasterServices master) {
super("HbckChecker-", master,
master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL));
public HbckChore(MasterServices master) {
super("HbckChore-", master,
master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL));
this.master = master;
}

@Override
protected void chore() {
protected synchronized void chore() {
running = true;
regionInfoMap.clear();
orphanRegionsOnRS.clear();
Expand Down Expand Up @@ -277,6 +277,6 @@ public long getCheckingStartTimestamp() {
* Used for web ui to show when the HBCK checking report generated.
*/
public long getCheckingEndTimestamp() {
return this.checkingStartTimestamp;
return this.checkingEndTimestamp;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import org.apache.hadoop.hbase.client.MasterSwitchType;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableState;
Expand All @@ -68,10 +67,7 @@
import org.apache.hadoop.hbase.ipc.RpcServerFactory;
import org.apache.hadoop.hbase.ipc.RpcServerInterface;
import org.apache.hadoop.hbase.ipc.ServerRpcController;
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure;
import org.apache.hadoop.hbase.master.locking.LockProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
Expand All @@ -91,7 +87,6 @@
import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
import org.apache.hadoop.hbase.quotas.QuotaUtil;
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
import org.apache.hadoop.hbase.regionserver.RpcSchedulerFactory;
import org.apache.hadoop.hbase.replication.ReplicationException;
Expand All @@ -112,10 +107,8 @@
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.yetus.audience.InterfaceAudience;
Expand Down Expand Up @@ -264,6 +257,8 @@
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleanerChoreRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCleanerChoreResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunHbckChoreResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SecurityCapabilitiesResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SetBalancerRunningRequest;
Expand Down Expand Up @@ -2371,6 +2366,20 @@ public FileArchiveNotificationResponse reportFileArchival(RpcController controll

// HBCK Services

@Override
public RunHbckChoreResponse runHbckChore(RpcController c, RunHbckChoreRequest req)
throws ServiceException {
rpcPreCheck("runHbckChore");
LOG.info("{} request HBCK chore to run", master.getClientIdAuditPrefix());
HbckChore hbckChore = master.getHbckChore();
boolean ran = false;
if (!hbckChore.isRunning()) {
hbckChore.chore();
ran = true;
}
return RunHbckChoreResponse.newBuilder().setRan(ran).build();
}

/**
* Update state of the table in meta only. This is required by hbck in some situations to cleanup
* stuck assign/ unassign regions procedures for the table.
Expand Down
16 changes: 8 additions & 8 deletions hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import="java.time.ZonedDateTime"
import="java.time.format.DateTimeFormatter"
%>
<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
<%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
Expand All @@ -38,18 +38,18 @@
<%
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
HbckChecker hbckChecker = master.getHbckChecker();
HbckChore hbckChore = master.getHbckChore();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
Map<String, ServerName> orphanRegionsOnRS = null;
List<String> orphanRegionsOnFS = null;
long startTimestamp = 0;
long endTimestamp = 0;
if (hbckChecker != null) {
inconsistentRegions = hbckChecker.getInconsistentRegions();
orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS();
orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS();
startTimestamp = hbckChecker.getCheckingStartTimestamp();
endTimestamp = hbckChecker.getCheckingEndTimestamp();
if (hbckChore != null) {
inconsistentRegions = hbckChore.getInconsistentRegions();
orphanRegionsOnRS = hbckChore.getOrphanRegionsOnRS();
orphanRegionsOnFS = hbckChore.getOrphanRegionsOnFS();
startTimestamp = hbckChore.getCheckingStartTimestamp();
endTimestamp = hbckChore.getCheckingEndTimestamp();
}
ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp),
ZoneId.systemDefault());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.MasterObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
Expand Down Expand Up @@ -237,6 +238,20 @@ public void testScheduleSCP() throws Exception {
waitOnPids(pids);
}

@Test
public void testRunHbckChore() throws Exception {
HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
long endTimestamp = master.getHbckChore().getCheckingEndTimestamp();
Hbck hbck = getHbck();
boolean ran = false;
while (!ran) {
ran = hbck.runHbckChore();
if (ran) {
assertTrue(master.getHbckChore().getCheckingEndTimestamp() > endTimestamp);
}
}
}

public static class FailingSplitAfterMetaUpdatedMasterObserver
implements MasterCoprocessor, MasterObserver {
public volatile CountDownLatch latch;
Expand Down
Loading