Skip to content

HBASE-26712 Balancer encounters NPE in rare case #4112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ public class HMaster extends HRegionServer implements MasterServices {

private LoadBalancer balancer;
private BalancerChore balancerChore;
private static boolean disableBalancerChoreForTest = false;
private RegionNormalizerManager regionNormalizerManager;
private ClusterStatusChore clusterStatusChore;
private ClusterStatusPublisher clusterStatusPublisherChore = null;
Expand Down Expand Up @@ -1090,7 +1091,9 @@ private void finishActiveMasterInitialization(MonitoredTask status)
this.clusterStatusChore = new ClusterStatusChore(this, balancer);
getChoreService().scheduleChore(clusterStatusChore);
this.balancerChore = new BalancerChore(this);
getChoreService().scheduleChore(balancerChore);
if (!disableBalancerChoreForTest) {
getChoreService().scheduleChore(balancerChore);
}
if (regionNormalizerManager != null) {
getChoreService().scheduleChore(regionNormalizerManager.getRegionNormalizerChore());
}
Expand Down Expand Up @@ -4036,4 +4039,23 @@ MasterRegion getMasterRegion() {
public Collection<ServerName> getLiveRegionServers() {
return regionServerTracker.getRegionServers();
}

@RestrictedApi(explanation = "Should only be called in tests", link = "",
allowedOnPath = ".*/src/test/.*")
void setLoadBalancer(LoadBalancer loadBalancer) {
this.balancer = loadBalancer;
}

@RestrictedApi(explanation = "Should only be called in tests", link = "",
allowedOnPath = ".*/src/test/.*")
void setAssignmentManager(AssignmentManager assignmentManager) {
this.assignmentManager = assignmentManager;
}

@RestrictedApi(explanation = "Should only be called in tests", link = "",
allowedOnPath = ".*/src/test/.*")
static void setDisableBalancerChoreForTest(boolean disable) {
disableBalancerChoreForTest = disable;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -823,9 +823,9 @@ public Future<byte[]> moveAsync(RegionPlan regionPlan) throws HBaseIOException {
public Future<byte[]> balance(RegionPlan regionPlan) throws HBaseIOException {
ServerName current =
this.getRegionStates().getRegionAssignments().get(regionPlan.getRegionInfo());
if (!current.equals(regionPlan.getSource())) {
if (current == null || !current.equals(regionPlan.getSource())) {
LOG.debug("Skip region plan {}, source server not match, current region location is {}",
regionPlan, current);
regionPlan, current == null ? "(null)" : current);
return null;
}
return moveAsync(regionPlan);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;

@Category({ MasterTests.class, MediumTests.class })
public class TestMasterBalancerNPE {

@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestMasterBalancerNPE.class);

private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static final byte[] FAMILYNAME = Bytes.toBytes("fam");
@Rule
public TestName name = new TestName();

@Before
public void setupConfiguration() {
/**
* Make {@link BalancerChore} not run,so does not disrupt the test.
*/
HMaster.setDisableBalancerChoreForTest(true);
TEST_UTIL.getConfiguration().set(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
MyLoadBalancer.class.getName());
}

@After
public void shutdown() throws Exception {
HMaster.setDisableBalancerChoreForTest(false);
TEST_UTIL.getConfiguration().set(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
LoadBalancerFactory.getDefaultLoadBalancerClass().getName());
TEST_UTIL.shutdownMiniCluster();
}

/**
* This test is for HBASE-26712, to make the region is unassigned just before
* {@link AssignmentManager#balance} is invoked on the region.
*/
@Test
public void testBalancerNPE() throws Exception {
TEST_UTIL.startMiniCluster(2);
TEST_UTIL.getAdmin().balancerSwitch(false, true);
TableName tableName = createTable(name.getMethodName());
final HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
List<RegionInfo> regionInfos = TEST_UTIL.getAdmin().getRegions(tableName);
assertTrue(regionInfos.size() == 1);
final ServerName serverName1 =
TEST_UTIL.getMiniHBaseCluster().getRegionServer(0).getServerName();
final ServerName serverName2 =
TEST_UTIL.getMiniHBaseCluster().getRegionServer(1).getServerName();

final RegionInfo regionInfo = regionInfos.get(0);

MyLoadBalancer loadBalancer = (MyLoadBalancer)master.getLoadBalancer();
MyLoadBalancer spiedLoadBalancer = Mockito.spy(loadBalancer);
final AtomicReference<RegionPlan> regionPlanRef = new AtomicReference<RegionPlan>();

/**
* Mock {@link StochasticLoadBalancer#balanceTable} to return the {@link RegionPlan} to move the
* only region to the other RegionServer.
*/
Mockito.doAnswer((InvocationOnMock invocation) -> {
@SuppressWarnings("unchecked")
Map<ServerName, List<RegionInfo>> regionServerNameToRegionInfos =
(Map<ServerName, List<RegionInfo>>) invocation.getArgument(1);


List<ServerName> assignedRegionServerNames = new ArrayList<ServerName>();
for (Map.Entry<ServerName, List<RegionInfo>> entry : regionServerNameToRegionInfos
.entrySet()) {
if (entry.getValue()!= null) {
entry.getValue().stream().forEach((reginInfo) -> {
if(reginInfo.getTable().equals(tableName)) {assignedRegionServerNames.add(entry.getKey());}});
}
}
assertTrue(assignedRegionServerNames.size() == 1);
ServerName assignedRegionServerName = assignedRegionServerNames.get(0);
ServerName notAssignedRegionServerName =
assignedRegionServerName.equals(serverName1) ? serverName2 : serverName1;
RegionPlan regionPlan =
new RegionPlan(regionInfo, assignedRegionServerName, notAssignedRegionServerName);
regionPlanRef.set(regionPlan);
return Arrays.asList(regionPlan);
}).when(spiedLoadBalancer).balanceTable(Mockito.eq(HConstants.ENSEMBLE_TABLE_NAME),
Mockito.anyMap());

AssignmentManager assignmentManager = master.getAssignmentManager();
final AssignmentManager spiedAssignmentManager = Mockito.spy(assignmentManager);
final CyclicBarrier cyclicBarrier = new CyclicBarrier(2);

/**
* Override {@link AssignmentManager#balance} to invoke real {@link AssignmentManager#balance}
* after the region is successfully unassigned.
*/
Mockito.doAnswer((InvocationOnMock invocation) -> {
RegionPlan regionPlan = invocation.getArgument(0);
RegionPlan referedRegionPlan = regionPlanRef.get();
assertTrue(referedRegionPlan != null);
if (referedRegionPlan.equals(regionPlan)) {
/**
* To make {@link AssignmentManager#unassign} could be invoked just before
* {@link AssignmentManager#balance} is invoked.
*/
cyclicBarrier.await();
/**
* After {@link AssignmentManager#unassign} is completed,we could invoke
* {@link AssignmentManager#balance}.
*/
cyclicBarrier.await();
}
/**
* Before HBASE-26712,here may throw NPE.
*/
return invocation.callRealMethod();
}).when(spiedAssignmentManager).balance(Mockito.any());


try {
final AtomicReference<Throwable> exceptionRef = new AtomicReference<Throwable>(null);
Thread unassignThread = new Thread(() -> {
try {
/**
* To invoke {@link AssignmentManager#unassign} just before
* {@link AssignmentManager#balance} is invoked.
*/
cyclicBarrier.await();
spiedAssignmentManager.unassign(regionInfo);
assertTrue(spiedAssignmentManager.getRegionStates().getRegionAssignments()
.get(regionInfo) == null);
/**
* After {@link AssignmentManager#unassign} is completed,we could invoke
* {@link AssignmentManager#balance}.
*/
cyclicBarrier.await();
} catch (Exception e) {
exceptionRef.set(e);
}
});
unassignThread.setName("UnassignThread");
unassignThread.start();

master.setLoadBalancer(spiedLoadBalancer);
master.setAssignmentManager(spiedAssignmentManager);
/**
* enable balance
*/
TEST_UTIL.getAdmin().balancerSwitch(true, false);
/**
* Before HBASE-26712,here invokes {@link AssignmentManager#balance(RegionPlan)}
* which may throw NPE.
*/
master.balanceOrUpdateMetrics();

unassignThread.join();
assertTrue(exceptionRef.get() == null);
} finally {
master.setLoadBalancer(loadBalancer);
master.setAssignmentManager(assignmentManager);
}
}

private TableName createTable(String table) throws IOException {
TableName tableName = TableName.valueOf(table);
TEST_UTIL.createTable(tableName, FAMILYNAME);
return tableName;
}

/**
* Define this class because the test needs to override
* {@link StochasticLoadBalancer#balanceTable}, which is protected.
*/
static class MyLoadBalancer extends StochasticLoadBalancer{
@Override
protected List<RegionPlan> balanceTable(TableName tableName,
Map<ServerName, List<RegionInfo>> loadOfOneTable) {
return super.balanceTable(tableName, loadOfOneTable);
}
}
}