Skip to content

Commit 0321f56

Browse files
authored
HBASE-23652 Move the unsupported procedure type check before migrating to RegionProcedureStore (#1018)
Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
1 parent 19d3bed commit 0321f56

File tree

5 files changed

+103
-56
lines changed

5 files changed

+103
-56
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,11 @@
9898
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
9999
import org.apache.hadoop.hbase.log.HBaseMarkers;
100100
import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
101-
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
102101
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
103102
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
104-
import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure;
105103
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
106104
import org.apache.hadoop.hbase.master.assignment.RegionStates;
107105
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
108-
import org.apache.hadoop.hbase.master.assignment.UnassignProcedure;
109106
import org.apache.hadoop.hbase.master.balancer.BalancerChore;
110107
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
111108
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
@@ -135,7 +132,6 @@
135132
import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
136133
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
137134
import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
138-
import org.apache.hadoop.hbase.master.procedure.RecoverMetaProcedure;
139135
import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure;
140136
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
141137
import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
@@ -225,7 +221,6 @@
225221
import org.slf4j.LoggerFactory;
226222

227223
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
228-
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet;
229224
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
230225
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
231226

@@ -833,45 +828,6 @@ protected void initializeZKBasedSystemTrackers()
833828
this.mpmHost.initialize(this, this.metricsMaster);
834829
}
835830

836-
private static final ImmutableSet<Class<? extends Procedure>> UNSUPPORTED_PROCEDURES =
837-
ImmutableSet.of(RecoverMetaProcedure.class, AssignProcedure.class, UnassignProcedure.class,
838-
MoveRegionProcedure.class);
839-
840-
/**
841-
* In HBASE-20811, we have introduced a new TRSP to assign/unassign/move regions, and it is
842-
* incompatible with the old AssignProcedure/UnassignProcedure/MoveRegionProcedure. So we need to
843-
* make sure that there are none these procedures when upgrading. If there are, the master will
844-
* quit, you need to go back to the old version to finish these procedures first before upgrading.
845-
*/
846-
private void checkUnsupportedProcedure(
847-
Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType)
848-
throws HBaseIOException {
849-
// Confirm that we do not have unfinished assign/unassign related procedures. It is not easy to
850-
// support both the old assign/unassign procedures and the new TransitRegionStateProcedure as
851-
// there will be conflict in the code for AM. We should finish all these procedures before
852-
// upgrading.
853-
for (Class<? extends Procedure> clazz : UNSUPPORTED_PROCEDURES) {
854-
List<Procedure<MasterProcedureEnv>> procs = procsByType.get(clazz);
855-
if (procs != null) {
856-
LOG.error(
857-
"Unsupported procedure type {} found, please rollback your master to the old" +
858-
" version to finish them, and then try to upgrade again. The full procedure list: {}",
859-
clazz, procs);
860-
throw new HBaseIOException("Unsupported procedure type " + clazz + " found");
861-
}
862-
}
863-
// A special check for SCP, as we do not support RecoverMetaProcedure any more so we need to
864-
// make sure that no one will try to schedule it but SCP does have a state which will schedule
865-
// it.
866-
if (procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
867-
.map(p -> (ServerCrashProcedure) p).anyMatch(ServerCrashProcedure::isInRecoverMetaState)) {
868-
LOG.error("At least one ServerCrashProcedure is going to schedule a RecoverMetaProcedure," +
869-
" which is not supported any more. Please rollback your master to the old version to" +
870-
" finish them, and then try to upgrade again.");
871-
throw new HBaseIOException("Unsupported procedure state found for ServerCrashProcedure");
872-
}
873-
}
874-
875831
// Will be overriden in test to inject customized AssignmentManager
876832
@VisibleForTesting
877833
protected AssignmentManager createAssignmentManager(MasterServices master) {
@@ -965,13 +921,10 @@ private void finishActiveMasterInitialization(MonitoredTask status) throws IOExc
965921
this.splitWALManager = new SplitWALManager(this);
966922
}
967923
createProcedureExecutor();
968-
@SuppressWarnings("rawtypes")
969-
Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType =
924+
Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType =
970925
procedureExecutor.getActiveProceduresNoCopy().stream()
971926
.collect(Collectors.groupingBy(p -> p.getClass()));
972927

973-
checkUnsupportedProcedure(procsByType);
974-
975928
// Create Assignment Manager
976929
this.assignmentManager = createAssignmentManager(this);
977930
this.assignmentManager.start();

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
3030
import org.apache.yetus.audience.InterfaceAudience;
3131

32+
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
33+
3234
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
3335
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.AssignRegionStateData;
3436
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
@@ -140,4 +142,10 @@ public void toStringClassDetails(StringBuilder sb) {
140142
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
141143
return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics();
142144
}
145+
146+
@VisibleForTesting
147+
@Override
148+
public void setProcId(long procId) {
149+
super.setProcId(procId);
150+
}
143151
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ public RegionInfo getRegionInfo() {
6969
return regionInfo;
7070
}
7171

72-
protected void setRegionInfo(final RegionInfo regionInfo) {
72+
@VisibleForTesting
73+
public void setRegionInfo(final RegionInfo regionInfo) {
7374
this.regionInfo = regionInfo;
7475
}
7576

hbase-server/src/main/java/org/apache/hadoop/hbase/procedure2/store/region/RegionProcedureStore.java

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
import java.io.UncheckedIOException;
2626
import java.util.ArrayList;
2727
import java.util.Arrays;
28+
import java.util.Collections;
29+
import java.util.HashMap;
2830
import java.util.List;
31+
import java.util.Map;
2932
import org.apache.commons.lang3.mutable.MutableLong;
3033
import org.apache.hadoop.conf.Configuration;
3134
import org.apache.hadoop.fs.FileStatus;
@@ -45,6 +48,11 @@
4548
import org.apache.hadoop.hbase.client.TableDescriptor;
4649
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
4750
import org.apache.hadoop.hbase.log.HBaseMarkers;
51+
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
52+
import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure;
53+
import org.apache.hadoop.hbase.master.assignment.UnassignProcedure;
54+
import org.apache.hadoop.hbase.master.procedure.RecoverMetaProcedure;
55+
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
4856
import org.apache.hadoop.hbase.procedure2.Procedure;
4957
import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
5058
import org.apache.hadoop.hbase.procedure2.store.LeaseRecovery;
@@ -65,6 +73,7 @@
6573
import org.slf4j.LoggerFactory;
6674

6775
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
76+
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet;
6877
import org.apache.hbase.thirdparty.com.google.common.math.IntMath;
6978

7079
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
@@ -298,6 +307,46 @@ private HRegion open(Configuration conf, FileSystem fs, Path rootDir) throws IOE
298307
null);
299308
}
300309

310+
@SuppressWarnings("deprecation")
311+
private static final ImmutableSet<Class<?>> UNSUPPORTED_PROCEDURES =
312+
ImmutableSet.of(RecoverMetaProcedure.class, AssignProcedure.class, UnassignProcedure.class,
313+
MoveRegionProcedure.class);
314+
315+
/**
316+
* In HBASE-20811, we have introduced a new TRSP to assign/unassign/move regions, and it is
317+
* incompatible with the old AssignProcedure/UnassignProcedure/MoveRegionProcedure. So we need to
318+
* make sure that there are none these procedures when upgrading. If there are, the master will
319+
* quit, you need to go back to the old version to finish these procedures first before upgrading.
320+
*/
321+
private void checkUnsupportedProcedure(Map<Class<?>, List<Procedure<?>>> procsByType)
322+
throws HBaseIOException {
323+
// Confirm that we do not have unfinished assign/unassign related procedures. It is not easy to
324+
// support both the old assign/unassign procedures and the new TransitRegionStateProcedure as
325+
// there will be conflict in the code for AM. We should finish all these procedures before
326+
// upgrading.
327+
for (Class<?> clazz : UNSUPPORTED_PROCEDURES) {
328+
List<Procedure<?>> procs = procsByType.get(clazz);
329+
if (procs != null) {
330+
LOG.error("Unsupported procedure type {} found, please rollback your master to the old" +
331+
" version to finish them, and then try to upgrade again." +
332+
" See https://hbase.apache.org/book.html#upgrade2.2 for more details." +
333+
" The full procedure list: {}", clazz, procs);
334+
throw new HBaseIOException("Unsupported procedure type " + clazz + " found");
335+
}
336+
}
337+
// A special check for SCP, as we do not support RecoverMetaProcedure any more so we need to
338+
// make sure that no one will try to schedule it but SCP does have a state which will schedule
339+
// it.
340+
if (procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
341+
.map(p -> (ServerCrashProcedure) p).anyMatch(ServerCrashProcedure::isInRecoverMetaState)) {
342+
LOG.error("At least one ServerCrashProcedure is going to schedule a RecoverMetaProcedure," +
343+
" which is not supported any more. Please rollback your master to the old version to" +
344+
" finish them, and then try to upgrade again." +
345+
" See https://hbase.apache.org/book.html#upgrade2.2 for more details.");
346+
throw new HBaseIOException("Unsupported procedure state found for ServerCrashProcedure");
347+
}
348+
}
349+
301350
@SuppressWarnings("deprecation")
302351
private void tryMigrate(FileSystem fs) throws IOException {
303352
Configuration conf = server.getConfiguration();
@@ -311,7 +360,8 @@ private void tryMigrate(FileSystem fs) throws IOException {
311360
store.start(numThreads);
312361
store.recoverLease();
313362
MutableLong maxProcIdSet = new MutableLong(-1);
314-
MutableLong maxProcIdFromProcs = new MutableLong(-1);
363+
List<Procedure<?>> procs = new ArrayList<>();
364+
Map<Class<?>, List<Procedure<?>>> activeProcsByType = new HashMap<>();
315365
store.load(new ProcedureLoader() {
316366

317367
@Override
@@ -321,16 +371,13 @@ public void setMaxProcId(long maxProcId) {
321371

322372
@Override
323373
public void load(ProcedureIterator procIter) throws IOException {
324-
long procCount = 0;
325374
while (procIter.hasNext()) {
326375
Procedure<?> proc = procIter.next();
327-
update(proc);
328-
procCount++;
329-
if (proc.getProcId() > maxProcIdFromProcs.longValue()) {
330-
maxProcIdFromProcs.setValue(proc.getProcId());
376+
procs.add(proc);
377+
if (!proc.isFinished()) {
378+
activeProcsByType.computeIfAbsent(proc.getClass(), k -> new ArrayList<>()).add(proc);
331379
}
332380
}
333-
LOG.info("Migrated {} procedures", procCount);
334381
}
335382

336383
@Override
@@ -347,6 +394,22 @@ public void handleCorrupted(ProcedureIterator procIter) throws IOException {
347394
}
348395
}
349396
});
397+
398+
// check whether there are unsupported procedures, this could happen when we are migrating from
399+
// 2.1-. We used to do this in HMaster, after loading all the procedures from procedure store,
400+
// but here we have to do it before migrating, otherwise, if we find some unsupported
401+
// procedures, the users can not go back to 2.1 to finish them any more, as all the data are now
402+
// in the new region based procedure store, which is not supported in 2.1-.
403+
checkUnsupportedProcedure(activeProcsByType);
404+
405+
MutableLong maxProcIdFromProcs = new MutableLong(-1);
406+
for (Procedure<?> proc : procs) {
407+
update(proc);
408+
if (proc.getProcId() > maxProcIdFromProcs.longValue()) {
409+
maxProcIdFromProcs.setValue(proc.getProcId());
410+
}
411+
}
412+
LOG.info("Migrated {} existing procedures from the old storage format.", procs.size());
350413
LOG.info("The WALProcedureStore max pid is {}, and the max pid of all loaded procedures is {}",
351414
maxProcIdSet.longValue(), maxProcIdFromProcs.longValue());
352415
// Theoretically, the maxProcIdSet should be greater than or equal to maxProcIdFromProcs, but

hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStoreMigration.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.procedure2.store.region;
1919

20+
import static org.hamcrest.CoreMatchers.startsWith;
21+
import static org.hamcrest.MatcherAssert.assertThat;
2022
import static org.junit.Assert.assertEquals;
2123
import static org.junit.Assert.assertFalse;
2224
import static org.junit.Assert.fail;
@@ -32,6 +34,10 @@
3234
import org.apache.hadoop.fs.Path;
3335
import org.apache.hadoop.hbase.HBaseClassTestRule;
3436
import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
37+
import org.apache.hadoop.hbase.HBaseIOException;
38+
import org.apache.hadoop.hbase.TableName;
39+
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
40+
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
3541
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.LoadCounter;
3642
import org.apache.hadoop.hbase.procedure2.store.LeaseRecovery;
3743
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureIterator;
@@ -140,4 +146,20 @@ public void handleCorrupted(ProcedureIterator procIter) throws IOException {
140146
// make sure the old proc wal directory has been deleted.
141147
assertFalse(fs.exists(oldProcWALDir));
142148
}
149+
150+
@Test
151+
public void testMigrateWithUnsupportedProcedures() throws IOException {
152+
AssignProcedure assignProc = new AssignProcedure();
153+
assignProc.setProcId(1L);
154+
assignProc.setRegionInfo(RegionInfoBuilder.newBuilder(TableName.valueOf("table")).build());
155+
walStore.insert(assignProc, null);
156+
walStore.stop(true);
157+
158+
try {
159+
store = RegionProcedureStoreTestHelper.createStore(htu.getConfiguration(), new LoadCounter());
160+
fail("Should fail since AssignProcedure is not supported");
161+
} catch (HBaseIOException e) {
162+
assertThat(e.getMessage(), startsWith("Unsupported"));
163+
}
164+
}
143165
}

0 commit comments

Comments
 (0)