Skip to content

Commit 5a633ad

Browse files
committed
HBASE-20377 Deal with table in enabling and disabling state when modifying serial replication peer
1 parent 826909a commit 5a633ad

File tree

4 files changed

+146
-32
lines changed

4 files changed

+146
-32
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/TableState.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,13 @@ public boolean isEnabled() {
103103
return isInStates(State.ENABLED);
104104
}
105105

106+
/**
107+
* @return True if table is {@link State#ENABLING}.
108+
*/
109+
public boolean isEnabling() {
110+
return isInStates(State.ENABLING);
111+
}
112+
106113
/**
107114
* @return True if {@link State#ENABLED} or {@link State#ENABLING}
108115
*/
@@ -117,6 +124,13 @@ public boolean isDisabled() {
117124
return isInStates(State.DISABLED);
118125
}
119126

127+
/**
128+
* @return True if table is disabling.
129+
*/
130+
public boolean isDisabling() {
131+
return isInStates(State.DISABLING);
132+
}
133+
120134
/**
121135
* @return True if {@link State#DISABLED} or {@link State#DISABLED}
122136
*/

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import java.io.IOException;
2222
import org.apache.hadoop.hbase.HBaseIOException;
23+
import org.apache.hadoop.hbase.HConstants;
2324
import org.apache.hadoop.hbase.MetaTableAccessor;
2425
import org.apache.hadoop.hbase.TableName;
2526
import org.apache.hadoop.hbase.TableNotEnabledException;
@@ -107,7 +108,7 @@ protected Flow executeFromState(final MasterProcedureEnv env, final DisableTable
107108
break;
108109
case DISABLE_TABLE_MARK_REGIONS_OFFLINE:
109110
addChildProcedure(env.getAssignmentManager().createUnassignProcedures(tableName));
110-
setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLED_TABLE_STATE);
111+
setNextState(DisableTableState.DISABLE_TABLE_ADD_REPLICATION_BARRIER);
111112
break;
112113
case DISABLE_TABLE_ADD_REPLICATION_BARRIER:
113114
if (env.getMasterServices().getTableDescriptors().get(tableName)
@@ -119,7 +120,8 @@ protected Flow executeFromState(final MasterProcedureEnv env, final DisableTable
119120
.getRegionsOfTable(tableName)) {
120121
long maxSequenceId =
121122
WALSplitter.getMaxRegionSequenceId(mfs.getFileSystem(), mfs.getRegionDir(region));
122-
mutator.mutate(MetaTableAccessor.makePutForReplicationBarrier(region, maxSequenceId,
123+
long openSeqNum = maxSequenceId > 0 ? maxSequenceId + 1 : HConstants.NO_SEQNUM;
124+
mutator.mutate(MetaTableAccessor.makePutForReplicationBarrier(region, openSeqNum,
123125
EnvironmentEdgeManager.currentTime()));
124126
}
125127
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ModifyPeerProcedure.java

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,16 @@
1818
package org.apache.hadoop.hbase.master.replication;
1919

2020
import java.io.IOException;
21+
import java.io.InterruptedIOException;
2122
import java.util.HashMap;
2223
import java.util.Map;
2324
import org.apache.hadoop.hbase.MetaTableAccessor;
2425
import org.apache.hadoop.hbase.TableName;
2526
import org.apache.hadoop.hbase.client.Connection;
26-
import org.apache.hadoop.hbase.client.RegionInfo;
2727
import org.apache.hadoop.hbase.client.TableDescriptor;
28-
import org.apache.hadoop.hbase.master.MasterFileSystem;
28+
import org.apache.hadoop.hbase.client.TableState;
2929
import org.apache.hadoop.hbase.master.TableStateManager;
3030
import org.apache.hadoop.hbase.master.TableStateManager.TableStateNotFoundException;
31-
import org.apache.hadoop.hbase.master.assignment.RegionStates;
3231
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
3332
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
3433
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
@@ -38,7 +37,6 @@
3837
import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
3938
import org.apache.hadoop.hbase.replication.ReplicationUtils;
4039
import org.apache.hadoop.hbase.util.Pair;
41-
import org.apache.hadoop.hbase.wal.WALSplitter;
4240
import org.apache.yetus.audience.InterfaceAudience;
4341
import org.slf4j.Logger;
4442
import org.slf4j.LoggerFactory;
@@ -56,6 +54,9 @@ public abstract class ModifyPeerProcedure extends AbstractPeerProcedure<PeerModi
5654

5755
protected static final int UPDATE_LAST_SEQ_ID_BATCH_SIZE = 1000;
5856

57+
// The sleep interval when waiting table to be enabled or disabled.
58+
protected static final int SLEEP_INTERVAL_MS = 1000;
59+
5960
protected ModifyPeerProcedure() {
6061
}
6162

@@ -126,6 +127,27 @@ protected void updateLastPushedSequenceIdForSerialPeer(MasterProcedureEnv env)
126127
throw new UnsupportedOperationException();
127128
}
128129

130+
// If the table is in enabling state, we need to wait until it is enabled and then reopen all its
131+
// regions.
132+
private boolean needReopen(TableStateManager tsm, TableName tn) throws IOException {
133+
for (;;) {
134+
try {
135+
TableState state = tsm.getTableState(tn);
136+
if (state.isEnabled()) {
137+
return true;
138+
}
139+
if (!state.isEnabling()) {
140+
return false;
141+
}
142+
Thread.sleep(SLEEP_INTERVAL_MS);
143+
} catch (TableStateNotFoundException e) {
144+
return false;
145+
} catch (InterruptedException e) {
146+
throw (IOException) new InterruptedIOException(e.getMessage()).initCause(e);
147+
}
148+
}
149+
}
150+
129151
private void reopenRegions(MasterProcedureEnv env) throws IOException {
130152
ReplicationPeerConfig peerConfig = getNewPeerConfig();
131153
ReplicationPeerConfig oldPeerConfig = getOldPeerConfig();
@@ -142,15 +164,10 @@ private void reopenRegions(MasterProcedureEnv env) throws IOException {
142164
ReplicationUtils.contains(oldPeerConfig, tn)) {
143165
continue;
144166
}
145-
try {
146-
if (!tsm.getTableState(tn).isEnabled()) {
147-
continue;
148-
}
149-
} catch (TableStateNotFoundException e) {
150-
continue;
167+
if (needReopen(tsm, tn)) {
168+
addChildProcedure(env.getAssignmentManager().createReopenProcedures(
169+
env.getAssignmentManager().getRegionStates().getRegionsOfTable(tn)));
151170
}
152-
addChildProcedure(env.getAssignmentManager().createReopenProcedures(
153-
env.getAssignmentManager().getRegionStates().getRegionsOfTable(tn)));
154171
}
155172
}
156173

@@ -183,6 +200,26 @@ protected final void setLastPushedSequenceId(MasterProcedureEnv env,
183200
}
184201
}
185202

203+
// If the table is currently disabling, then we need to wait until it is disabled.We will write
204+
// replication barrier for a disabled table. And return whether we need to update the last pushed
205+
// sequence id, if the table has been deleted already, i.e, we hit TableStateNotFoundException,
206+
// then we do not need to update last pushed sequence id for this table.
207+
private boolean needSetLastPushedSequenceId(TableStateManager tsm, TableName tn)
208+
throws IOException {
209+
for (;;) {
210+
try {
211+
if (!tsm.getTableState(tn).isDisabling()) {
212+
return true;
213+
}
214+
Thread.sleep(SLEEP_INTERVAL_MS);
215+
} catch (TableStateNotFoundException e) {
216+
return false;
217+
} catch (InterruptedException e) {
218+
throw (IOException) new InterruptedIOException(e.getMessage()).initCause(e);
219+
}
220+
}
221+
}
222+
186223
// Will put the encodedRegionName->lastPushedSeqId pair into the map passed in, if the map is
187224
// large enough we will call queueStorage.setLastSequenceIds and clear the map. So the caller
188225
// should not forget to check whether the map is empty at last, if not you should call
@@ -192,26 +229,13 @@ protected final void setLastPushedSequenceIdForTable(MasterProcedureEnv env, Tab
192229
TableStateManager tsm = env.getMasterServices().getTableStateManager();
193230
ReplicationQueueStorage queueStorage = env.getReplicationPeerManager().getQueueStorage();
194231
Connection conn = env.getMasterServices().getConnection();
195-
RegionStates regionStates = env.getAssignmentManager().getRegionStates();
196-
MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
197-
boolean isTableEnabled;
198-
try {
199-
isTableEnabled = tsm.getTableState(tableName).isEnabled();
200-
} catch (TableStateNotFoundException e) {
232+
if (!needSetLastPushedSequenceId(tsm, tableName)) {
201233
return;
202234
}
203-
if (isTableEnabled) {
204-
for (Pair<String, Long> name2Barrier : MetaTableAccessor
205-
.getTableEncodedRegionNameAndLastBarrier(conn, tableName)) {
206-
addToMap(lastSeqIds, name2Barrier.getFirst(), name2Barrier.getSecond().longValue() - 1,
207-
queueStorage);
208-
}
209-
} else {
210-
for (RegionInfo region : regionStates.getRegionsOfTable(tableName, true)) {
211-
long maxSequenceId =
212-
WALSplitter.getMaxRegionSequenceId(mfs.getFileSystem(), mfs.getRegionDir(region));
213-
addToMap(lastSeqIds, region.getEncodedName(), maxSequenceId, queueStorage);
214-
}
235+
for (Pair<String, Long> name2Barrier : MetaTableAccessor
236+
.getTableEncodedRegionNameAndLastBarrier(conn, tableName)) {
237+
addToMap(lastSeqIds, name2Barrier.getFirst(), name2Barrier.getSecond().longValue() - 1,
238+
queueStorage);
215239
}
216240
}
217241

hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestAddToSerialReplicationPeer.java

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.replication;
1919

20+
import static org.junit.Assert.assertTrue;
21+
2022
import java.io.IOException;
2123
import java.util.Collections;
2224
import org.apache.hadoop.fs.Path;
@@ -26,6 +28,8 @@
2628
import org.apache.hadoop.hbase.client.Put;
2729
import org.apache.hadoop.hbase.client.RegionInfo;
2830
import org.apache.hadoop.hbase.client.Table;
31+
import org.apache.hadoop.hbase.client.TableState;
32+
import org.apache.hadoop.hbase.master.TableStateManager;
2933
import org.apache.hadoop.hbase.regionserver.HRegionServer;
3034
import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
3135
import org.apache.hadoop.hbase.replication.regionserver.Replication;
@@ -192,4 +196,74 @@ public void testDisabledTable() throws Exception {
192196
waitUntilReplicationDone(100);
193197
checkOrder(100);
194198
}
199+
200+
@Test
201+
public void testDisablingTable() throws Exception {
202+
TableName tableName = createTable();
203+
try (Table table = UTIL.getConnection().getTable(tableName)) {
204+
for (int i = 0; i < 100; i++) {
205+
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
206+
}
207+
}
208+
UTIL.getAdmin().disableTable(tableName);
209+
rollAllWALs();
210+
TableStateManager tsm = UTIL.getMiniHBaseCluster().getMaster().getTableStateManager();
211+
tsm.setTableState(tableName, TableState.State.DISABLING);
212+
Thread t = new Thread(() -> {
213+
try {
214+
addPeer(true);
215+
} catch (IOException e) {
216+
throw new RuntimeException(e);
217+
}
218+
});
219+
t.start();
220+
Thread.sleep(5000);
221+
// we will wait on the disabling table so the thread should still be alive.
222+
assertTrue(t.isAlive());
223+
tsm.setTableState(tableName, TableState.State.DISABLED);
224+
t.join();
225+
UTIL.getAdmin().enableTable(tableName);
226+
try (Table table = UTIL.getConnection().getTable(tableName)) {
227+
for (int i = 0; i < 100; i++) {
228+
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
229+
}
230+
}
231+
waitUntilReplicationDone(100);
232+
checkOrder(100);
233+
}
234+
235+
@Test
236+
public void testEnablingTable() throws Exception {
237+
TableName tableName = createTable();
238+
try (Table table = UTIL.getConnection().getTable(tableName)) {
239+
for (int i = 0; i < 100; i++) {
240+
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
241+
}
242+
}
243+
RegionInfo region = UTIL.getAdmin().getRegions(tableName).get(0);
244+
HRegionServer rs = UTIL.getOtherRegionServer(UTIL.getRSForFirstRegionInTable(tableName));
245+
moveRegionAndArchiveOldWals(region, rs);
246+
TableStateManager tsm = UTIL.getMiniHBaseCluster().getMaster().getTableStateManager();
247+
tsm.setTableState(tableName, TableState.State.ENABLING);
248+
Thread t = new Thread(() -> {
249+
try {
250+
addPeer(true);
251+
} catch (IOException e) {
252+
throw new RuntimeException(e);
253+
}
254+
});
255+
t.start();
256+
Thread.sleep(5000);
257+
// we will wait on the disabling table so the thread should still be alive.
258+
assertTrue(t.isAlive());
259+
tsm.setTableState(tableName, TableState.State.ENABLED);
260+
t.join();
261+
try (Table table = UTIL.getConnection().getTable(tableName)) {
262+
for (int i = 0; i < 100; i++) {
263+
table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i)));
264+
}
265+
}
266+
waitUntilReplicationDone(100);
267+
checkOrder(100);
268+
}
195269
}

0 commit comments

Comments
 (0)