|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, software |
| 13 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | + * See the License for the specific language governing permissions and |
| 16 | + * limitations under the License. |
| 17 | + */ |
| 18 | +package org.apache.hadoop.hbase.master; |
| 19 | + |
| 20 | +import static org.junit.Assert.assertEquals; |
| 21 | +import static org.junit.Assert.assertFalse; |
| 22 | +import static org.junit.Assert.assertTrue; |
| 23 | + |
| 24 | +import java.io.IOException; |
| 25 | +import java.time.Duration; |
| 26 | +import java.util.List; |
| 27 | + |
| 28 | +import org.apache.hadoop.fs.Path; |
| 29 | +import org.apache.hadoop.hbase.Cell; |
| 30 | +import org.apache.hadoop.hbase.HBaseClassTestRule; |
| 31 | +import org.apache.hadoop.hbase.HBaseTestingUtility; |
| 32 | +import org.apache.hadoop.hbase.HConstants; |
| 33 | +import org.apache.hadoop.hbase.MiniHBaseCluster; |
| 34 | +import org.apache.hadoop.hbase.ServerName; |
| 35 | +import org.apache.hadoop.hbase.TableName; |
| 36 | +import org.apache.hadoop.hbase.client.Get; |
| 37 | +import org.apache.hadoop.hbase.client.Put; |
| 38 | +import org.apache.hadoop.hbase.client.RegionInfo; |
| 39 | +import org.apache.hadoop.hbase.client.Result; |
| 40 | +import org.apache.hadoop.hbase.client.Table; |
| 41 | +import org.apache.hadoop.hbase.master.region.MasterRegionFactory; |
| 42 | +import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; |
| 43 | +import org.apache.hadoop.hbase.regionserver.HRegionServer; |
| 44 | +import org.apache.hadoop.hbase.testclassification.LargeTests; |
| 45 | +import org.apache.hadoop.hbase.util.Bytes; |
| 46 | +import org.apache.hadoop.hbase.util.CommonFSUtils; |
| 47 | +import org.apache.hadoop.hbase.zookeeper.ZKUtil; |
| 48 | + |
| 49 | +import org.junit.ClassRule; |
| 50 | +import org.junit.Rule; |
| 51 | +import org.junit.Test; |
| 52 | +import org.junit.experimental.categories.Category; |
| 53 | +import org.junit.rules.TestName; |
| 54 | + |
| 55 | +/** |
| 56 | + * Test reuse storefiles within data directory when cluster failover with a set of new region |
| 57 | + * servers with different hostnames with or without WALs and Zookeeper ZNodes support. For any |
| 58 | + * hbase system table and user table can be assigned normally after cluster restart. |
| 59 | + */ |
| 60 | +@Category({ LargeTests.class }) |
| 61 | +public class TestRecreateCluster { |
| 62 | + @ClassRule |
| 63 | + public static final HBaseClassTestRule CLASS_RULE = |
| 64 | + HBaseClassTestRule.forClass(TestRecreateCluster.class); |
| 65 | + |
| 66 | + @Rule |
| 67 | + public TestName name = new TestName(); |
| 68 | + |
| 69 | + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); |
| 70 | + private static final int NUM_RS = 3; |
| 71 | + private static final long TIMEOUT_MS = Duration.ofMinutes(2).toMillis(); |
| 72 | + |
| 73 | + @Test |
| 74 | + public void testRecreateCluster_UserTableDisabled_WithoutCleanupWALsAndZNodes() |
| 75 | + throws Exception { |
| 76 | + validateRecreateClusterWithUserDisabled(false, false); |
| 77 | + } |
| 78 | + |
| 79 | + @Test |
| 80 | + public void testRecreateCluster_UserTableEnabled_WithoutCleanupWALsAndZNodes() throws Exception { |
| 81 | + validateRecreateClusterWithUserTableEnabled(false, false); |
| 82 | + } |
| 83 | + |
| 84 | + private void validateRecreateClusterWithUserDisabled(boolean cleanupWALs, |
| 85 | + boolean cleanUpZNodes) throws Exception { |
| 86 | + TEST_UTIL.startMiniCluster(NUM_RS); |
| 87 | + try { |
| 88 | + TableName tableName = TableName.valueOf("t1"); |
| 89 | + prepareDataBeforeRecreate(TEST_UTIL, tableName); |
| 90 | + TEST_UTIL.getAdmin().disableTable(tableName); |
| 91 | + TEST_UTIL.waitTableDisabled(tableName.getName()); |
| 92 | + restartHBaseCluster(cleanupWALs, cleanUpZNodes); |
| 93 | + TEST_UTIL.getAdmin().enableTable(tableName); |
| 94 | + validateDataAfterRecreate(TEST_UTIL, tableName); |
| 95 | + } finally { |
| 96 | + TEST_UTIL.shutdownMiniCluster(); |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + private void validateRecreateClusterWithUserTableEnabled(boolean cleanupWALs, |
| 101 | + boolean cleanUpZNodes) throws Exception { |
| 102 | + TEST_UTIL.startMiniCluster(NUM_RS); |
| 103 | + try { |
| 104 | + TableName tableName = TableName.valueOf("t1"); |
| 105 | + prepareDataBeforeRecreate(TEST_UTIL, tableName); |
| 106 | + restartHBaseCluster(cleanupWALs, cleanUpZNodes); |
| 107 | + validateDataAfterRecreate(TEST_UTIL, tableName); |
| 108 | + } finally { |
| 109 | + TEST_UTIL.shutdownMiniCluster(); |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + private void restartHBaseCluster(boolean cleanUpWALs, boolean cleanUpZnodes) throws Exception { |
| 114 | + // flush cache so that everything is on disk |
| 115 | + TEST_UTIL.getMiniHBaseCluster().flushcache(); |
| 116 | + |
| 117 | + List<ServerName> oldServers = |
| 118 | + TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList(); |
| 119 | + |
| 120 | + // make sure there is no procedures pending |
| 121 | + TEST_UTIL.waitFor(TIMEOUT_MS, () -> TEST_UTIL.getHBaseCluster().getMaster() |
| 122 | + .getProcedures().stream().filter(p -> p.isFinished()).findAny().isPresent()); |
| 123 | + |
| 124 | + // shutdown and delete data if needed |
| 125 | + Path walRootDirPath = TEST_UTIL.getMiniHBaseCluster().getMaster().getWALRootDir(); |
| 126 | + Path rootDirPath = CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()); |
| 127 | + TEST_UTIL.shutdownMiniHBaseCluster(); |
| 128 | + |
| 129 | + if (cleanUpWALs) { |
| 130 | + TEST_UTIL.getDFSCluster().getFileSystem() |
| 131 | + .delete(new Path(rootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true); |
| 132 | + TEST_UTIL.getDFSCluster().getFileSystem() |
| 133 | + .delete(new Path(walRootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true); |
| 134 | + TEST_UTIL.getDFSCluster().getFileSystem() |
| 135 | + .delete(new Path(walRootDirPath, WALProcedureStore.MASTER_PROCEDURE_LOGDIR), true); |
| 136 | + |
| 137 | + TEST_UTIL.getDFSCluster().getFileSystem() |
| 138 | + .delete(new Path(walRootDirPath, HConstants.HREGION_LOGDIR_NAME), true); |
| 139 | + TEST_UTIL.getDFSCluster().getFileSystem() |
| 140 | + .delete(new Path(walRootDirPath, HConstants.HREGION_OLDLOGDIR_NAME), true); |
| 141 | + } |
| 142 | + |
| 143 | + if (cleanUpZnodes) { |
| 144 | + // delete all zk data |
| 145 | + // we cannot keep ZK data because it will hold the meta region states as open and |
| 146 | + // didn't submit a InitMetaProcedure |
| 147 | + ZKUtil.deleteChildrenRecursively(TEST_UTIL.getZooKeeperWatcher(), |
| 148 | + TEST_UTIL.getZooKeeperWatcher().getZNodePaths().baseZNode); |
| 149 | + TEST_UTIL.shutdownMiniZKCluster(); |
| 150 | + TEST_UTIL.startMiniZKCluster(); |
| 151 | + } |
| 152 | + |
| 153 | + TEST_UTIL.restartHBaseCluster(NUM_RS); |
| 154 | + TEST_UTIL.waitFor(TIMEOUT_MS, |
| 155 | + () -> TEST_UTIL.getMiniHBaseCluster().getNumLiveRegionServers() == NUM_RS); |
| 156 | + |
| 157 | + // make sure we have a new set of region servers with different hostnames and ports |
| 158 | + List<ServerName> newServers = |
| 159 | + TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList(); |
| 160 | + assertFalse(newServers.stream().filter(newServer -> oldServers.contains(newServer)).findAny() |
| 161 | + .isPresent()); |
| 162 | + } |
| 163 | + |
| 164 | + private void prepareDataBeforeRecreate( |
| 165 | + HBaseTestingUtility testUtil, TableName tableName) throws Exception { |
| 166 | + Table table = testUtil.createTable(tableName, "f"); |
| 167 | + Put put = new Put(Bytes.toBytes("r1")); |
| 168 | + put.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c"), Bytes.toBytes("v")); |
| 169 | + table.put(put); |
| 170 | + |
| 171 | + ensureTableNotColocatedWithSystemTable(tableName, TableName.NAMESPACE_TABLE_NAME); |
| 172 | + } |
| 173 | + |
| 174 | + private void ensureTableNotColocatedWithSystemTable(TableName userTable, TableName systemTable) |
| 175 | + throws IOException, InterruptedException { |
| 176 | + MiniHBaseCluster hbaseCluster = TEST_UTIL.getHBaseCluster(); |
| 177 | + assertTrue("Please start more than 1 regionserver", |
| 178 | + hbaseCluster.getRegionServerThreads().size() > 1); |
| 179 | + |
| 180 | + int userTableServerNum = getServerNumForTableWithOnlyOneRegion(userTable); |
| 181 | + int systemTableServerNum = getServerNumForTableWithOnlyOneRegion(systemTable); |
| 182 | + |
| 183 | + if (userTableServerNum != systemTableServerNum) { |
| 184 | + // no-ops if user table and system are already on a different host |
| 185 | + return; |
| 186 | + } |
| 187 | + |
| 188 | + int destServerNum = (systemTableServerNum + 1) % NUM_RS; |
| 189 | + assertTrue(systemTableServerNum != destServerNum); |
| 190 | + |
| 191 | + HRegionServer systemTableServer = hbaseCluster.getRegionServer(systemTableServerNum); |
| 192 | + HRegionServer destServer = hbaseCluster.getRegionServer(destServerNum); |
| 193 | + assertTrue(!systemTableServer.equals(destServer)); |
| 194 | + // make sure the dest server is live before moving region |
| 195 | + hbaseCluster.waitForRegionServerToStart(destServer.getServerName().getHostname(), |
| 196 | + destServer.getServerName().getPort(), TIMEOUT_MS); |
| 197 | + // move region of userTable to a different regionserver not co-located with system table |
| 198 | + TEST_UTIL.moveRegionAndWait(TEST_UTIL.getAdmin().getRegions(userTable).get(0), |
| 199 | + destServer.getServerName()); |
| 200 | + } |
| 201 | + |
| 202 | + private int getServerNumForTableWithOnlyOneRegion(TableName tableName) throws IOException { |
| 203 | + List<RegionInfo> tableRegionInfos = TEST_UTIL.getAdmin().getRegions(tableName); |
| 204 | + assertEquals(1, tableRegionInfos.size()); |
| 205 | + return TEST_UTIL.getHBaseCluster() |
| 206 | + .getServerWith(tableRegionInfos.get(0).getRegionName()); |
| 207 | + } |
| 208 | + |
| 209 | + private void validateDataAfterRecreate( |
| 210 | + HBaseTestingUtility testUtil, TableName tableName) throws Exception { |
| 211 | + Table t1 = testUtil.getConnection().getTable(tableName); |
| 212 | + Get get = new Get(Bytes.toBytes("r1")); |
| 213 | + get.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c")); |
| 214 | + Result result = t1.get(get); |
| 215 | + assertTrue(result.advance()); |
| 216 | + Cell cell = result.current(); |
| 217 | + assertEquals("v", Bytes.toString(cell.getValueArray(), |
| 218 | + cell.getValueOffset(), cell.getValueLength())); |
| 219 | + assertFalse(result.advance()); |
| 220 | + } |
| 221 | + |
| 222 | +} |
0 commit comments