Skip to content

Commit ead2eee

Browse files
taklwuStephen Wu
authored andcommitted
HBASE-24833: Bootstrap should not delete the META table directory if it's not partial
Add a check on meta bootstrap to skip removing meta table directory if ZK data does not exist when hmaster restart. here the existence of clusterID in ZK indicate if the meta is partial if we hit the INIT_META_WRITE_FS_LAYOUT in InitMetaProcedure
1 parent 17a0c2a commit ead2eee

File tree

5 files changed

+251
-3
lines changed

5 files changed

+251
-3
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,8 @@ public void run() {
463463
private final boolean maintenanceMode;
464464
static final String MAINTENANCE_MODE = "hbase.master.maintenance_mode";
465465

466+
private boolean isClusterRestartWithExistingZNodes;
467+
466468
// Cached clusterId on stand by masters to serve clusterID requests from clients.
467469
private final CachedClusterId cachedClusterId;
468470

@@ -915,6 +917,11 @@ private void finishActiveMasterInitialization(MonitoredTask status)
915917
this.tableDescriptors.getAll();
916918
}
917919

920+
// check cluster Id stored in ZNode before, and use it to indicate if a cluster has been
921+
// restarted with an existing Zookeeper quorum.
922+
isClusterRestartWithExistingZNodes =
923+
ZKClusterId.readClusterIdZNode(this.zooKeeper) != null ? true : false;
924+
918925
// Publish cluster ID; set it in Master too. The superclass RegionServer does this later but
919926
// only after it has checked in with the Master. At least a few tests ask Master for clusterId
920927
// before it has called its run method and before RegionServer has done the reportForDuty.
@@ -3894,4 +3901,8 @@ public String getClusterId() {
38943901
public MetaRegionLocationCache getMetaRegionLocationCache() {
38953902
return this.metaRegionLocationCache;
38963903
}
3904+
3905+
public boolean isClusterRestartWithExistingZNodes() {
3906+
return isClusterRestartWithExistingZNodes;
3907+
}
38973908
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,4 +524,10 @@ default SplitWALManager getSplitWALManager(){
524524
* Run the ReplicationBarrierChore.
525525
*/
526526
void runReplicationBarrierCleaner();
527+
528+
/**
529+
* @return true as running on existing ZNodes when the master restarts, otherwise false.
530+
*/
531+
boolean isClusterRestartWithExistingZNodes();
532+
527533
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,14 @@ public TableOperationType getTableOperationType() {
6767
return TableOperationType.CREATE;
6868
}
6969

70-
private static void writeFsLayout(Path rootDir, Configuration conf) throws IOException {
70+
private static void writeFsLayout(boolean isClusterRestartWithExistingZNodes,
71+
Path rootDir, Configuration conf) throws IOException {
7172
LOG.info("BOOTSTRAP: creating hbase:meta region");
7273
FileSystem fs = rootDir.getFileSystem(conf);
7374
Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME);
74-
if (fs.exists(tableDir) && !fs.delete(tableDir, true)) {
75+
// when entering the state of INIT_META_WRITE_FS_LAYOUT, we use existing zookeeper data to
76+
// tell if this is a partial created meta, if so we should delete and recreate the meta table.
77+
if (isClusterRestartWithExistingZNodes && fs.exists(tableDir) && !fs.delete(tableDir, true)) {
7578
LOG.warn("Can not delete partial created meta table, continue...");
7679
}
7780
// Bootstrapping, make sure blockcache is off. Else, one will be
@@ -96,7 +99,8 @@ protected Flow executeFromState(MasterProcedureEnv env, InitMetaState state)
9699
case INIT_META_WRITE_FS_LAYOUT:
97100
Configuration conf = env.getMasterConfiguration();
98101
Path rootDir = CommonFSUtils.getRootDir(conf);
99-
writeFsLayout(rootDir, conf);
102+
writeFsLayout(env.getMasterServices().isClusterRestartWithExistingZNodes(), rootDir,
103+
conf);
100104
setNextState(InitMetaState.INIT_META_ASSIGN_META);
101105
return Flow.HAS_MORE_STATE;
102106
case INIT_META_ASSIGN_META:

hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,4 +482,9 @@ public List<RegionPlan> executeRegionPlansWithThrottling(List<RegionPlan> plans)
482482

483483
@Override
484484
public void runReplicationBarrierCleaner() {}
485+
486+
@Override
487+
public boolean isClusterRestartWithExistingZNodes() {
488+
return false;
489+
}
485490
}
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.master;
19+
20+
import static org.junit.Assert.assertEquals;
21+
import static org.junit.Assert.assertFalse;
22+
import static org.junit.Assert.assertTrue;
23+
24+
import java.io.IOException;
25+
import java.time.Duration;
26+
import java.util.List;
27+
28+
import org.apache.hadoop.fs.Path;
29+
import org.apache.hadoop.hbase.Cell;
30+
import org.apache.hadoop.hbase.HBaseClassTestRule;
31+
import org.apache.hadoop.hbase.HBaseTestingUtility;
32+
import org.apache.hadoop.hbase.HConstants;
33+
import org.apache.hadoop.hbase.MiniHBaseCluster;
34+
import org.apache.hadoop.hbase.ServerName;
35+
import org.apache.hadoop.hbase.TableName;
36+
import org.apache.hadoop.hbase.client.Get;
37+
import org.apache.hadoop.hbase.client.Put;
38+
import org.apache.hadoop.hbase.client.RegionInfo;
39+
import org.apache.hadoop.hbase.client.Result;
40+
import org.apache.hadoop.hbase.client.Table;
41+
import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
42+
import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
43+
import org.apache.hadoop.hbase.regionserver.HRegionServer;
44+
import org.apache.hadoop.hbase.testclassification.LargeTests;
45+
import org.apache.hadoop.hbase.util.Bytes;
46+
import org.apache.hadoop.hbase.util.CommonFSUtils;
47+
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
48+
49+
import org.junit.ClassRule;
50+
import org.junit.Rule;
51+
import org.junit.Test;
52+
import org.junit.experimental.categories.Category;
53+
import org.junit.rules.TestName;
54+
55+
/**
56+
* Test reuse storefiles within data directory when cluster failover with a set of new region
57+
* servers with different hostnames with or without WALs and Zookeeper ZNodes support. For any
58+
* hbase system table and user table can be assigned normally after cluster restart.
59+
*/
60+
@Category({ LargeTests.class })
61+
public class TestRecreateCluster {
62+
@ClassRule
63+
public static final HBaseClassTestRule CLASS_RULE =
64+
HBaseClassTestRule.forClass(TestRecreateCluster.class);
65+
66+
@Rule
67+
public TestName name = new TestName();
68+
69+
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
70+
private static final int NUM_RS = 3;
71+
private static final long TIMEOUT_MS = Duration.ofMinutes(2).toMillis();
72+
73+
@Test
74+
public void testRecreateCluster_UserTableDisabled_WithoutCleanupWALsAndZNodes()
75+
throws Exception {
76+
validateRecreateClusterWithUserDisabled(false, false);
77+
}
78+
79+
@Test
80+
public void testRecreateCluster_UserTableEnabled_WithoutCleanupWALsAndZNodes() throws Exception {
81+
validateRecreateClusterWithUserTableEnabled(false, false);
82+
}
83+
84+
private void validateRecreateClusterWithUserDisabled(boolean cleanupWALs,
85+
boolean cleanUpZNodes) throws Exception {
86+
TEST_UTIL.startMiniCluster(NUM_RS);
87+
try {
88+
TableName tableName = TableName.valueOf("t1");
89+
prepareDataBeforeRecreate(TEST_UTIL, tableName);
90+
TEST_UTIL.getAdmin().disableTable(tableName);
91+
TEST_UTIL.waitTableDisabled(tableName.getName());
92+
restartHBaseCluster(cleanupWALs, cleanUpZNodes);
93+
TEST_UTIL.getAdmin().enableTable(tableName);
94+
validateDataAfterRecreate(TEST_UTIL, tableName);
95+
} finally {
96+
TEST_UTIL.shutdownMiniCluster();
97+
}
98+
}
99+
100+
private void validateRecreateClusterWithUserTableEnabled(boolean cleanupWALs,
101+
boolean cleanUpZNodes) throws Exception {
102+
TEST_UTIL.startMiniCluster(NUM_RS);
103+
try {
104+
TableName tableName = TableName.valueOf("t1");
105+
prepareDataBeforeRecreate(TEST_UTIL, tableName);
106+
restartHBaseCluster(cleanupWALs, cleanUpZNodes);
107+
validateDataAfterRecreate(TEST_UTIL, tableName);
108+
} finally {
109+
TEST_UTIL.shutdownMiniCluster();
110+
}
111+
}
112+
113+
private void restartHBaseCluster(boolean cleanUpWALs, boolean cleanUpZnodes) throws Exception {
114+
// flush cache so that everything is on disk
115+
TEST_UTIL.getMiniHBaseCluster().flushcache();
116+
117+
List<ServerName> oldServers =
118+
TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList();
119+
120+
// make sure there is no procedures pending
121+
TEST_UTIL.waitFor(TIMEOUT_MS, () -> TEST_UTIL.getHBaseCluster().getMaster()
122+
.getProcedures().stream().filter(p -> p.isFinished()).findAny().isPresent());
123+
124+
// shutdown and delete data if needed
125+
Path walRootDirPath = TEST_UTIL.getMiniHBaseCluster().getMaster().getWALRootDir();
126+
Path rootDirPath = CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration());
127+
TEST_UTIL.shutdownMiniHBaseCluster();
128+
129+
if (cleanUpWALs) {
130+
TEST_UTIL.getDFSCluster().getFileSystem()
131+
.delete(new Path(rootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true);
132+
TEST_UTIL.getDFSCluster().getFileSystem()
133+
.delete(new Path(walRootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true);
134+
TEST_UTIL.getDFSCluster().getFileSystem()
135+
.delete(new Path(walRootDirPath, WALProcedureStore.MASTER_PROCEDURE_LOGDIR), true);
136+
137+
TEST_UTIL.getDFSCluster().getFileSystem()
138+
.delete(new Path(walRootDirPath, HConstants.HREGION_LOGDIR_NAME), true);
139+
TEST_UTIL.getDFSCluster().getFileSystem()
140+
.delete(new Path(walRootDirPath, HConstants.HREGION_OLDLOGDIR_NAME), true);
141+
}
142+
143+
if (cleanUpZnodes) {
144+
// delete all zk data
145+
// we cannot keep ZK data because it will hold the meta region states as open and
146+
// didn't submit a InitMetaProcedure
147+
ZKUtil.deleteChildrenRecursively(TEST_UTIL.getZooKeeperWatcher(),
148+
TEST_UTIL.getZooKeeperWatcher().getZNodePaths().baseZNode);
149+
TEST_UTIL.shutdownMiniZKCluster();
150+
TEST_UTIL.startMiniZKCluster();
151+
}
152+
153+
TEST_UTIL.restartHBaseCluster(NUM_RS);
154+
TEST_UTIL.waitFor(TIMEOUT_MS,
155+
() -> TEST_UTIL.getMiniHBaseCluster().getNumLiveRegionServers() == NUM_RS);
156+
157+
// make sure we have a new set of region servers with different hostnames and ports
158+
List<ServerName> newServers =
159+
TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList();
160+
assertFalse(newServers.stream().filter(newServer -> oldServers.contains(newServer)).findAny()
161+
.isPresent());
162+
}
163+
164+
private void prepareDataBeforeRecreate(
165+
HBaseTestingUtility testUtil, TableName tableName) throws Exception {
166+
Table table = testUtil.createTable(tableName, "f");
167+
Put put = new Put(Bytes.toBytes("r1"));
168+
put.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c"), Bytes.toBytes("v"));
169+
table.put(put);
170+
171+
ensureTableNotColocatedWithSystemTable(tableName, TableName.NAMESPACE_TABLE_NAME);
172+
}
173+
174+
private void ensureTableNotColocatedWithSystemTable(TableName userTable, TableName systemTable)
175+
throws IOException, InterruptedException {
176+
MiniHBaseCluster hbaseCluster = TEST_UTIL.getHBaseCluster();
177+
assertTrue("Please start more than 1 regionserver",
178+
hbaseCluster.getRegionServerThreads().size() > 1);
179+
180+
int userTableServerNum = getServerNumForTableWithOnlyOneRegion(userTable);
181+
int systemTableServerNum = getServerNumForTableWithOnlyOneRegion(systemTable);
182+
183+
if (userTableServerNum != systemTableServerNum) {
184+
// no-ops if user table and system are already on a different host
185+
return;
186+
}
187+
188+
int destServerNum = (systemTableServerNum + 1) % NUM_RS;
189+
assertTrue(systemTableServerNum != destServerNum);
190+
191+
HRegionServer systemTableServer = hbaseCluster.getRegionServer(systemTableServerNum);
192+
HRegionServer destServer = hbaseCluster.getRegionServer(destServerNum);
193+
assertTrue(!systemTableServer.equals(destServer));
194+
// make sure the dest server is live before moving region
195+
hbaseCluster.waitForRegionServerToStart(destServer.getServerName().getHostname(),
196+
destServer.getServerName().getPort(), TIMEOUT_MS);
197+
// move region of userTable to a different regionserver not co-located with system table
198+
TEST_UTIL.moveRegionAndWait(TEST_UTIL.getAdmin().getRegions(userTable).get(0),
199+
destServer.getServerName());
200+
}
201+
202+
private int getServerNumForTableWithOnlyOneRegion(TableName tableName) throws IOException {
203+
List<RegionInfo> tableRegionInfos = TEST_UTIL.getAdmin().getRegions(tableName);
204+
assertEquals(1, tableRegionInfos.size());
205+
return TEST_UTIL.getHBaseCluster()
206+
.getServerWith(tableRegionInfos.get(0).getRegionName());
207+
}
208+
209+
private void validateDataAfterRecreate(
210+
HBaseTestingUtility testUtil, TableName tableName) throws Exception {
211+
Table t1 = testUtil.getConnection().getTable(tableName);
212+
Get get = new Get(Bytes.toBytes("r1"));
213+
get.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c"));
214+
Result result = t1.get(get);
215+
assertTrue(result.advance());
216+
Cell cell = result.current();
217+
assertEquals("v", Bytes.toString(cell.getValueArray(),
218+
cell.getValueOffset(), cell.getValueLength()));
219+
assertFalse(result.advance());
220+
}
221+
222+
}

0 commit comments

Comments
 (0)