Skip to content

Commit ccb9ea1

Browse files
committed
HBASE-28199 Phase I: Suspend TRSP and SCP when updating meta
1 parent 23c4156 commit ccb9ea1

15 files changed

+727
-150
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java

Lines changed: 81 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.List;
2828
import java.util.Map;
2929
import java.util.Set;
30+
import java.util.concurrent.CompletableFuture;
3031
import java.util.concurrent.Future;
3132
import java.util.concurrent.TimeUnit;
3233
import java.util.concurrent.atomic.AtomicBoolean;
@@ -82,6 +83,7 @@
8283
import org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer;
8384
import org.apache.hadoop.hbase.util.Bytes;
8485
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
86+
import org.apache.hadoop.hbase.util.FutureUtils;
8587
import org.apache.hadoop.hbase.util.Pair;
8688
import org.apache.hadoop.hbase.util.Threads;
8789
import org.apache.hadoop.hbase.util.VersionInfo;
@@ -1989,71 +1991,78 @@ public RegionInfo getRegionInfo(final String encodedRegionName) {
19891991
// Should only be called in TransitRegionStateProcedure(and related procedures), as the locking
19901992
// and pre-assumptions are very tricky.
19911993
// ============================================================================================
1992-
private void transitStateAndUpdate(RegionStateNode regionNode, RegionState.State newState,
1993-
RegionState.State... expectedStates) throws IOException {
1994+
private CompletableFuture<Void> transitStateAndUpdate(RegionStateNode regionNode,
1995+
RegionState.State newState, RegionState.State... expectedStates) {
19941996
RegionState.State state = regionNode.getState();
1995-
regionNode.transitionState(newState, expectedStates);
1996-
boolean succ = false;
19971997
try {
1998-
regionStateStore.updateRegionLocation(regionNode);
1999-
succ = true;
2000-
} finally {
2001-
if (!succ) {
1998+
regionNode.transitionState(newState, expectedStates);
1999+
} catch (UnexpectedStateException e) {
2000+
return FutureUtils.failedFuture(e);
2001+
}
2002+
CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);
2003+
FutureUtils.addListener(future, (r, e) -> {
2004+
if (e != null) {
20022005
// revert
20032006
regionNode.setState(state);
20042007
}
2005-
}
2008+
});
2009+
return future;
20062010
}
20072011

20082012
// should be called within the synchronized block of RegionStateNode
2009-
void regionOpening(RegionStateNode regionNode) throws IOException {
2013+
CompletableFuture<Void> regionOpening(RegionStateNode regionNode) {
20102014
// As in SCP, for performance reason, there is no TRSP attached with this region, we will not
20112015
// update the region state, which means that the region could be in any state when we want to
20122016
// assign it after a RS crash. So here we do not pass the expectedStates parameter.
2013-
transitStateAndUpdate(regionNode, State.OPENING);
2014-
regionStates.addRegionToServer(regionNode);
2015-
// update the operation count metrics
2016-
metrics.incrementOperationCounter();
2017+
return transitStateAndUpdate(regionNode, State.OPENING).thenAccept(r -> {
2018+
regionStates.addRegionToServer(regionNode);
2019+
// update the operation count metrics
2020+
metrics.incrementOperationCounter();
2021+
});
20172022
}
20182023

20192024
// should be called under the RegionStateNode lock
20202025
// The parameter 'giveUp' means whether we will try to open the region again, if it is true, then
20212026
// we will persist the FAILED_OPEN state into hbase:meta.
2022-
void regionFailedOpen(RegionStateNode regionNode, boolean giveUp) throws IOException {
2027+
CompletableFuture<Void> regionFailedOpen(RegionStateNode regionNode, boolean giveUp) {
20232028
RegionState.State state = regionNode.getState();
20242029
ServerName regionLocation = regionNode.getRegionLocation();
2025-
if (giveUp) {
2026-
regionNode.setState(State.FAILED_OPEN);
2027-
regionNode.setRegionLocation(null);
2028-
boolean succ = false;
2029-
try {
2030-
regionStateStore.updateRegionLocation(regionNode);
2031-
succ = true;
2032-
} finally {
2033-
if (!succ) {
2034-
// revert
2035-
regionNode.setState(state);
2036-
regionNode.setRegionLocation(regionLocation);
2037-
}
2030+
if (!giveUp) {
2031+
if (regionLocation != null) {
2032+
regionStates.removeRegionFromServer(regionLocation, regionNode);
20382033
}
2034+
return CompletableFuture.completedFuture(null);
20392035
}
2040-
if (regionLocation != null) {
2041-
regionStates.removeRegionFromServer(regionLocation, regionNode);
2042-
}
2036+
regionNode.setState(State.FAILED_OPEN);
2037+
regionNode.setRegionLocation(null);
2038+
CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);
2039+
FutureUtils.addListener(future, (r, e) -> {
2040+
if (e == null) {
2041+
if (regionLocation != null) {
2042+
regionStates.removeRegionFromServer(regionLocation, regionNode);
2043+
}
2044+
} else {
2045+
// revert
2046+
regionNode.setState(state);
2047+
regionNode.setRegionLocation(regionLocation);
2048+
}
2049+
});
2050+
return future;
20432051
}
20442052

20452053
// should be called under the RegionStateNode lock
2046-
void regionClosing(RegionStateNode regionNode) throws IOException {
2047-
transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING);
2048-
2049-
RegionInfo hri = regionNode.getRegionInfo();
2050-
// Set meta has not initialized early. so people trying to create/edit tables will wait
2051-
if (isMetaRegion(hri)) {
2052-
setMetaAssigned(hri, false);
2053-
}
2054-
regionStates.addRegionToServer(regionNode);
2055-
// update the operation count metrics
2056-
metrics.incrementOperationCounter();
2054+
CompletableFuture<Void> regionClosing(RegionStateNode regionNode) {
2055+
return transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING)
2056+
.thenAccept(r -> {
2057+
RegionInfo hri = regionNode.getRegionInfo();
2058+
// Set meta has not initialized early. so people trying to create/edit tables will wait
2059+
if (isMetaRegion(hri)) {
2060+
setMetaAssigned(hri, false);
2061+
}
2062+
regionStates.addRegionToServer(regionNode);
2063+
// update the operation count metrics
2064+
metrics.incrementOperationCounter();
2065+
});
20572066
}
20582067

20592068
// for open and close, they will first be persist to the procedure store in
@@ -2062,15 +2071,17 @@ void regionClosing(RegionStateNode regionNode) throws IOException {
20622071
// RegionRemoteProcedureBase is woken up, we will persist the RegionStateNode to hbase:meta.
20632072

20642073
// should be called under the RegionStateNode lock
2065-
void regionOpenedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException {
2074+
void regionOpenedWithoutPersistingToMeta(RegionStateNode regionNode)
2075+
throws UnexpectedStateException {
20662076
regionNode.transitionState(State.OPEN, STATES_EXPECTED_ON_OPEN);
20672077
RegionInfo regionInfo = regionNode.getRegionInfo();
20682078
regionStates.addRegionToServer(regionNode);
20692079
regionStates.removeFromFailedOpen(regionInfo);
20702080
}
20712081

20722082
// should be called under the RegionStateNode lock
2073-
void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException {
2083+
void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode)
2084+
throws UnexpectedStateException {
20742085
ServerName regionLocation = regionNode.getRegionLocation();
20752086
regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED);
20762087
regionNode.setRegionLocation(null);
@@ -2080,40 +2091,41 @@ void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOEx
20802091
}
20812092
}
20822093

2094+
// should be called under the RegionStateNode lock
2095+
CompletableFuture<Void> persistToMeta(RegionStateNode regionNode) {
2096+
return regionStateStore.updateRegionLocation(regionNode).thenAccept(r -> {
2097+
RegionInfo regionInfo = regionNode.getRegionInfo();
2098+
if (isMetaRegion(regionInfo) && regionNode.getState() == State.OPEN) {
2099+
// Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it
2100+
// can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager
2101+
// which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state
2102+
// on table that contains state.
2103+
setMetaAssigned(regionInfo, true);
2104+
}
2105+
});
2106+
}
2107+
20832108
// should be called under the RegionStateNode lock
20842109
// for SCP
2085-
public void regionClosedAbnormally(RegionStateNode regionNode) throws IOException {
2110+
public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode) {
20862111
RegionState.State state = regionNode.getState();
20872112
ServerName regionLocation = regionNode.getRegionLocation();
2088-
regionNode.transitionState(State.ABNORMALLY_CLOSED);
2113+
regionNode.setState(State.ABNORMALLY_CLOSED);
20892114
regionNode.setRegionLocation(null);
2090-
boolean succ = false;
2091-
try {
2092-
regionStateStore.updateRegionLocation(regionNode);
2093-
succ = true;
2094-
} finally {
2095-
if (!succ) {
2115+
CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);
2116+
FutureUtils.addListener(future, (r, e) -> {
2117+
if (e == null) {
2118+
if (regionLocation != null) {
2119+
regionNode.setLastHost(regionLocation);
2120+
regionStates.removeRegionFromServer(regionLocation, regionNode);
2121+
}
2122+
} else {
20962123
// revert
20972124
regionNode.setState(state);
20982125
regionNode.setRegionLocation(regionLocation);
20992126
}
2100-
}
2101-
if (regionLocation != null) {
2102-
regionNode.setLastHost(regionLocation);
2103-
regionStates.removeRegionFromServer(regionLocation, regionNode);
2104-
}
2105-
}
2106-
2107-
void persistToMeta(RegionStateNode regionNode) throws IOException {
2108-
regionStateStore.updateRegionLocation(regionNode);
2109-
RegionInfo regionInfo = regionNode.getRegionInfo();
2110-
if (isMetaRegion(regionInfo) && regionNode.getState() == State.OPEN) {
2111-
// Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it
2112-
// can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager
2113-
// which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state
2114-
// on table that contains state.
2115-
setMetaAssigned(regionInfo, true);
2116-
}
2127+
});
2128+
return future;
21172129
}
21182130

21192131
// ============================================================================================

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121
import java.util.Optional;
22+
import java.util.concurrent.CompletableFuture;
2223
import org.apache.hadoop.hbase.HConstants;
2324
import org.apache.hadoop.hbase.ServerName;
2425
import org.apache.hadoop.hbase.TableName;
@@ -29,6 +30,7 @@
2930
import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
3031
import org.apache.hadoop.hbase.procedure2.Procedure;
3132
import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
33+
import org.apache.hadoop.hbase.procedure2.ProcedureFutureUtil;
3234
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
3335
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
3436
import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
@@ -73,6 +75,8 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur
7375

7476
private RetryCounter retryCounter;
7577

78+
private CompletableFuture<Void> future;
79+
7680
protected RegionRemoteProcedureBase() {
7781
}
7882

@@ -268,11 +272,21 @@ private void unattach(MasterProcedureEnv env) {
268272
getParent(env).unattachRemoteProc(this);
269273
}
270274

275+
private CompletableFuture<Void> getFuture() {
276+
return future;
277+
}
278+
279+
private void setFuture(CompletableFuture<Void> f) {
280+
future = f;
281+
}
282+
271283
@Override
272284
protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
273285
throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
274286
RegionStateNode regionNode = getRegionNode(env);
275-
regionNode.lock();
287+
if (future == null) {
288+
regionNode.lock(this);
289+
}
276290
try {
277291
switch (state) {
278292
case REGION_REMOTE_PROCEDURE_DISPATCH: {
@@ -294,16 +308,29 @@ protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
294308
throw new ProcedureSuspendedException();
295309
}
296310
case REGION_REMOTE_PROCEDURE_REPORT_SUCCEED:
297-
env.getAssignmentManager().persistToMeta(regionNode);
298-
unattach(env);
311+
if (
312+
ProcedureFutureUtil.checkFuture(this, this::getFuture, this::setFuture,
313+
() -> unattach(env))
314+
) {
315+
return null;
316+
}
317+
ProcedureFutureUtil.suspendIfNecessary(this, this::setFuture,
318+
env.getAssignmentManager().persistToMeta(regionNode), env, () -> unattach(env));
299319
return null;
300320
case REGION_REMOTE_PROCEDURE_DISPATCH_FAIL:
301321
// the remote call is failed so we do not need to change the region state, just return.
302322
unattach(env);
303323
return null;
304324
case REGION_REMOTE_PROCEDURE_SERVER_CRASH:
305-
env.getAssignmentManager().regionClosedAbnormally(regionNode);
306-
unattach(env);
325+
if (
326+
ProcedureFutureUtil.checkFuture(this, this::getFuture, this::setFuture,
327+
() -> unattach(env))
328+
) {
329+
return null;
330+
}
331+
ProcedureFutureUtil.suspendIfNecessary(this, this::setFuture,
332+
env.getAssignmentManager().regionClosedAbnormally(regionNode), env,
333+
() -> unattach(env));
307334
return null;
308335
default:
309336
throw new IllegalStateException("Unknown state: " + state);
@@ -314,12 +341,11 @@ protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
314341
}
315342
long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
316343
LOG.warn("Failed updating meta, suspend {}secs {}; {};", backoff / 1000, this, regionNode, e);
317-
setTimeout(Math.toIntExact(backoff));
318-
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
319-
skipPersistence();
320-
throw new ProcedureSuspendedException();
344+
throw suspend(Math.toIntExact(backoff), true);
321345
} finally {
322-
regionNode.unlock();
346+
if (future == null) {
347+
regionNode.unlock(this);
348+
}
323349
}
324350
}
325351

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
import java.util.Arrays;
2121
import java.util.concurrent.ConcurrentMap;
22-
import java.util.concurrent.locks.Lock;
23-
import java.util.concurrent.locks.ReentrantLock;
2422
import org.apache.hadoop.hbase.HConstants;
2523
import org.apache.hadoop.hbase.ServerName;
2624
import org.apache.hadoop.hbase.TableName;
@@ -30,6 +28,7 @@
3028
import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
3129
import org.apache.hadoop.hbase.master.RegionState;
3230
import org.apache.hadoop.hbase.master.RegionState.State;
31+
import org.apache.hadoop.hbase.procedure2.Procedure;
3332
import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
3433
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
3534
import org.apache.yetus.audience.InterfaceAudience;
@@ -75,7 +74,7 @@ public AssignmentProcedureEvent(final RegionInfo regionInfo) {
7574
}
7675
}
7776

78-
final Lock lock = new ReentrantLock();
77+
private final RegionStateNodeLock lock;
7978
private final RegionInfo regionInfo;
8079
private final ProcedureEvent<?> event;
8180
private final ConcurrentMap<RegionInfo, RegionStateNode> ritMap;
@@ -106,6 +105,7 @@ public AssignmentProcedureEvent(final RegionInfo regionInfo) {
106105
this.regionInfo = regionInfo;
107106
this.event = new AssignmentProcedureEvent(regionInfo);
108107
this.ritMap = ritMap;
108+
this.lock = new RegionStateNodeLock(regionInfo);
109109
}
110110

111111
/**
@@ -319,6 +319,9 @@ public void checkOnline() throws DoNotRetryRegionException {
319319
}
320320
}
321321

322+
// The below 3 methods are for normal locking operation, where the thread owner is the current
323+
// thread. Typically you just need to use these 3 methods, and use try..finally to release the
324+
// lock in the finally block
322325
public void lock() {
323326
lock.lock();
324327
}
@@ -330,4 +333,28 @@ public boolean tryLock() {
330333
public void unlock() {
331334
lock.unlock();
332335
}
336+
337+
// The below 3 methods are for locking region state node when executing procedures, where we may
338+
// do some time consuming work under the lock, for example, updating meta. As we may suspend the
339+
// procedure while holding the lock and then release it when the procedure is back, in another
340+
// thread, so we need to use the procedure itself as owner, instead of the current thread. You can
341+
// see the usage in TRSP, SCP, and RegionRemoteProcedureBase for more details.
342+
// Notice that, this does not mean you must use these 3 methods when locking region state node in
343+
// procedure, you are free to use the above 3 methods if you do not want to hold the lock when
344+
// suspending the procedure.
345+
public void lock(Procedure<?> proc) {
346+
lock.lock(proc);
347+
}
348+
349+
public boolean tryLock(Procedure<?> proc) {
350+
return lock.tryLock(proc);
351+
}
352+
353+
public void unlock(Procedure<?> proc) {
354+
lock.unlock(proc);
355+
}
356+
357+
boolean isLocked() {
358+
return lock.isLocked();
359+
}
333360
}

0 commit comments

Comments
 (0)