Skip to content

HBASE-26643 AssignmentManager should check for empty result returned … #4005

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> r
int numServers = servers == null ? 0 : servers.size();
if (numServers == 0) {
LOG.warn("Wanted to do round robin assignment but no servers to assign to");
return Collections.emptyMap();
return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions));
}

// TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
Expand Down Expand Up @@ -272,7 +272,7 @@ public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, Server
int numServers = servers == null ? 0 : servers.size();
if (numServers == 0) {
LOG.warn("Wanted to do retain assignment but no servers to assign to");
return Collections.emptyMap();
return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions.keySet()));
}

if (numServers == 1) { // Only one server, nothing fancy we can do here
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.StartTestingClusterOption;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.AsyncAdmin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.metrics2.impl.JmxCacheBuster;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState;

@Category({ MasterTests.class, MediumTests.class })
public class TestCreateTableNoRegionServer {

@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestCreateTableNoRegionServer.class);

private static final Logger LOG = LoggerFactory.getLogger(TestCreateTableNoRegionServer.class);

private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();

private static CountDownLatch ARRIVE;

private static CountDownLatch RESUME;

public static final class HMasterForTest extends HMaster {

public HMasterForTest(Configuration conf) throws IOException {
super(conf);
}

private boolean isInAssignRegionsState() {
try {
for (StackTraceElement e : Thread.currentThread().getStackTrace()) {
if (e.getClassName().equals(CreateTableProcedure.class.getName()) &&
e.getMethodName().equals("executeFromState")) {
for (Procedure<?> proc : getProcedures()) {
if (proc instanceof CreateTableProcedure && ((CreateTableProcedure) proc)
.getCurrentStateId() == CreateTableState.CREATE_TABLE_ASSIGN_REGIONS_VALUE) {
return true;
}
}
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return false;
}

@Override
public AssignmentManager getAssignmentManager() {
if (ARRIVE != null && isInAssignRegionsState()) {
ARRIVE.countDown();
ARRIVE = null;
try {
RESUME.await();
} catch (InterruptedException e) {
}
}
return super.getAssignmentManager();
}
}

@BeforeClass
public static void setUp() throws Exception {
UTIL.startMiniCluster(
StartTestingClusterOption.builder().masterClass(HMasterForTest.class).build());
// this may cause dead lock if there is no live region server and want to start a new server.
// In JmxCacheBuster we will reinitialize the metrics system so it will get some metrics which
// will need to access meta, since there is no region server, the request will hang there for a
// long time while holding the lock of MetricsSystemImpl, but when start a new region server, we
// also need to update metrics in handleReportForDutyResponse, since we are all in the same
// process and uses the same metrics instance, we hit dead lock.
JmxCacheBuster.stop();
}

@AfterClass
public static void tearDown() throws Exception {
UTIL.shutdownMiniCluster();
}

@Test
public void testCreate() throws Exception {
TableDescriptor td = TableDescriptorBuilder.newBuilder(TableName.valueOf("test"))
.setColumnFamily(ColumnFamilyDescriptorBuilder.of("f1")).build();
AsyncAdmin admin = UTIL.getAsyncConnection().getAdmin();
ARRIVE = new CountDownLatch(1);
RESUME = new CountDownLatch(1);
CompletableFuture<Void> future = admin.createTable(td);
ARRIVE.await();

UTIL.getMiniHBaseCluster().stopRegionServer(0).join();

// make sure we the region server is done.
UTIL.waitFor(30000,
() -> UTIL.getMiniHBaseCluster().getMaster().getServerManager().getOnlineServers().isEmpty());
RESUME.countDown();

Thread.sleep(10000);
// the procedure should still be in the CREATE_TABLE_ASSIGN_REGIONS state, but here, we just
// warn it as it may cause more serious problem later.
for (Procedure<?> proc : UTIL.getMiniHBaseCluster().getMaster().getProcedures()) {
if (proc instanceof CreateTableProcedure && !proc.isFinished() &&
((CreateTableProcedure) proc)
.getCurrentStateId() != CreateTableState.CREATE_TABLE_ASSIGN_REGIONS_VALUE) {
LOG.warn("Create table procedure {} assigned regions without a region server!", proc);
}
}
UTIL.getMiniHBaseCluster().startRegionServer();
// the creation should finally be done
future.get(30, TimeUnit.SECONDS);
// make sure we could put to the table
try (Table table = UTIL.getConnection().getTableBuilder(td.getTableName(), null)
.setOperationTimeout(5000).build()) {
table.put(new Put(Bytes.toBytes(0)).addColumn(td.getColumnFamilies()[0].getName(),
Bytes.toBytes("q"), Bytes.toBytes(0)));
}
}
}