Skip to content

Commit a33389e

Browse files
author
Hernan Gelaf-Romer
committed
SnapshotProcedure and EnableTableProcedure can cause a deadlock
1 parent a71288f commit a33389e

File tree

2 files changed

+52
-4
lines changed

2 files changed

+52
-4
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/SnapshotProcedure.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.apache.hadoop.conf.Configuration;
2727
import org.apache.hadoop.fs.FileSystem;
2828
import org.apache.hadoop.fs.Path;
29+
import org.apache.hadoop.hbase.HBaseIOException;
2930
import org.apache.hadoop.hbase.TableName;
3031
import org.apache.hadoop.hbase.client.RegionInfo;
3132
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
@@ -143,13 +144,18 @@ protected Flow executeFromState(MasterProcedureEnv env, SnapshotState state)
143144
setNextState(SnapshotState.SNAPSHOT_WRITE_SNAPSHOT_INFO);
144145
return Flow.HAS_MORE_STATE;
145146
case SNAPSHOT_WRITE_SNAPSHOT_INFO:
146-
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFS);
147147
TableState tableState =
148148
env.getMasterServices().getTableStateManager().getTableState(snapshotTable);
149149
if (tableState.isEnabled()) {
150+
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFS);
150151
setNextState(SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS);
151152
} else if (tableState.isDisabled()) {
153+
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFS);
152154
setNextState(SnapshotState.SNAPSHOT_SNAPSHOT_CLOSED_REGIONS);
155+
} else {
156+
setFailureState(new HBaseIOException(
157+
"Table " + snapshotTable + " is not in a valid state for snapshot: " + tableState));
158+
return Flow.NO_MORE_STATE;
153159
}
154160
return Flow.HAS_MORE_STATE;
155161
case SNAPSHOT_SNAPSHOT_ONLINE_REGIONS:
@@ -195,9 +201,7 @@ protected Flow executeFromState(MasterProcedureEnv env, SnapshotState state)
195201
} catch (ProcedureSuspendedException e) {
196202
throw e;
197203
} catch (Exception e) {
198-
setFailure("master-snapshot", e);
199-
LOG.warn("unexpected exception while execute {}. Mark procedure Failed.", this, e);
200-
status.abort("Abort Snapshot " + snapshot.getName() + " on Table " + snapshotTable);
204+
setFailureState(e);
201205
return Flow.NO_MORE_STATE;
202206
}
203207
}
@@ -236,6 +240,12 @@ protected SnapshotState getInitialState() {
236240
return SnapshotState.SNAPSHOT_PREPARE;
237241
}
238242

243+
private void setFailureState(Exception e) {
244+
setFailure("master-snapshot", e);
245+
LOG.warn("unexpected exception while execute {}. Mark procedure Failed.", this, e);
246+
status.abort("Abort Snapshot " + snapshot.getName() + " on Table " + snapshotTable);
247+
}
248+
239249
private void prepareSnapshot(MasterProcedureEnv env)
240250
throws ProcedureSuspendedException, IOException {
241251
if (isAnySplitOrMergeProcedureRunning(env)) {

hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSnapshotProcedureConcurrently.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@
2727
import java.util.List;
2828
import java.util.stream.Collectors;
2929
import org.apache.hadoop.hbase.HBaseClassTestRule;
30+
import org.apache.hadoop.hbase.TableName;
3031
import org.apache.hadoop.hbase.client.SnapshotDescription;
3132
import org.apache.hadoop.hbase.client.SnapshotType;
33+
import org.apache.hadoop.hbase.client.TableState;
3234
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
3335
import org.apache.hadoop.hbase.procedure2.Procedure;
3436
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
@@ -136,4 +138,40 @@ public void run() {
136138
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
137139
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotOnSameTableProto, TABLE_NAME, CF);
138140
}
141+
142+
@Test
143+
public void testItFailsIfTableIsNotDisabledOrEnabled() throws Exception {
144+
ProcedureExecutor<MasterProcedureEnv> executor = master.getMasterProcedureExecutor();
145+
MasterProcedureEnv env = executor.getEnvironment();
146+
master.getTableStateManager().setTableState(TABLE_NAME, TableState.State.ENABLING);
147+
// Using a delayed spy ensures we hit the problem state while the table enable procedure
148+
// is waiting to run
149+
SnapshotProcedure snapshotProc =
150+
getDelayedOnSpecificStateSnapshotProcedure(new SnapshotProcedure(env, snapshotProto), env,
151+
MasterProcedureProtos.SnapshotState.SNAPSHOT_WRITE_SNAPSHOT_INFO);
152+
long snapshotProcId = executor.submitProcedure(snapshotProc);
153+
TEST_UTIL.waitFor(2000, () -> master.getProcedures().stream().map(Procedure::getProcId)
154+
.collect(Collectors.toSet()).contains(snapshotProcId));
155+
156+
TestEnableTableProcedure enableTable =
157+
new TestEnableTableProcedure(master.getMasterProcedureExecutor().getEnvironment(), TABLE_NAME,
158+
MasterProcedureProtos.EnableTableState.ENABLE_TABLE_SET_ENABLED_TABLE_STATE);
159+
executor.submitProcedure(enableTable);
160+
TEST_UTIL.waitFor(70000, // need to wait slightly longer than the spy proc (60_000)
161+
() -> master.getTableStateManager().getTableState(TABLE_NAME).isEnabled());
162+
}
163+
164+
// Needs to be publicly accessible for Procedure validation
165+
public static class TestEnableTableProcedure extends EnableTableProcedure {
166+
// Necessary for Procedure validation
167+
public TestEnableTableProcedure() {
168+
}
169+
170+
public TestEnableTableProcedure(MasterProcedureEnv env, TableName tableName,
171+
MasterProcedureProtos.EnableTableState state) {
172+
super(env, tableName);
173+
this.setNextState(state);
174+
}
175+
176+
}
139177
}

0 commit comments

Comments
 (0)