Skip to content

Commit 52eb893

Browse files
luffygodsaintstack
authored andcommitted
HBASE-22169 Open region failed cause memory leak
Signed-off-by: stack <stack@apache.org>
1 parent 0c8dc5d commit 52eb893

File tree

3 files changed

+77
-17
lines changed

3 files changed

+77
-17
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7340,23 +7340,31 @@ public static Region openHRegion(final Region other, final CancelableProgressabl
73407340
*/
73417341
protected HRegion openHRegion(final CancelableProgressable reporter)
73427342
throws IOException {
7343-
// Refuse to open the region if we are missing local compression support
7344-
checkCompressionCodecs();
7345-
LOG.debug("checking encryption for " + this.getRegionInfo().getEncodedName());
7346-
// Refuse to open the region if encryption configuration is incorrect or
7347-
// codec support is missing
7348-
checkEncryption();
7349-
// Refuse to open the region if a required class cannot be loaded
7350-
LOG.debug("checking classloading for " + this.getRegionInfo().getEncodedName());
7351-
checkClassLoading();
7352-
this.openSeqNum = initialize(reporter);
7353-
this.mvcc.advanceTo(openSeqNum);
7354-
// The openSeqNum must be increased every time when a region is assigned, as we rely on it to
7355-
// determine whether a region has been successfully reopened. So here we always write open
7356-
// marker, even if the table is read only.
7357-
if (wal != null && getRegionServerServices() != null &&
7358-
RegionReplicaUtil.isDefaultReplica(getRegionInfo())) {
7359-
writeRegionOpenMarker(wal, openSeqNum);
7343+
try {
7344+
// Refuse to open the region if we are missing local compression support
7345+
checkCompressionCodecs();
7346+
// Refuse to open the region if encryption configuration is incorrect or
7347+
// codec support is missing
7348+
LOG.debug("checking encryption for " + this.getRegionInfo().getEncodedName());
7349+
checkEncryption();
7350+
// Refuse to open the region if a required class cannot be loaded
7351+
LOG.debug("checking classloading for " + this.getRegionInfo().getEncodedName());
7352+
checkClassLoading();
7353+
this.openSeqNum = initialize(reporter);
7354+
this.mvcc.advanceTo(openSeqNum);
7355+
// The openSeqNum must be increased every time when a region is assigned, as we rely on it to
7356+
// determine whether a region has been successfully reopened. So here we always write open
7357+
// marker, even if the table is read only.
7358+
if (wal != null && getRegionServerServices() != null &&
7359+
RegionReplicaUtil.isDefaultReplica(getRegionInfo())) {
7360+
writeRegionOpenMarker(wal, openSeqNum);
7361+
}
7362+
} catch(Throwable t) {
7363+
// By coprocessor path wrong region will open failed,
7364+
// MetricsRegionWrapperImpl is already init and not close,
7365+
// add region close when open failed
7366+
this.close();
7367+
throw t;
73607368
}
73617369
return this;
73627370
}

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
import java.io.IOException;
4343
import java.io.InterruptedIOException;
44+
import java.lang.reflect.Field;
4445
import java.math.BigDecimal;
4546
import java.nio.charset.StandardCharsets;
4647
import java.security.PrivilegedExceptionAction;
@@ -52,11 +53,14 @@
5253
import java.util.NavigableMap;
5354
import java.util.Objects;
5455
import java.util.TreeMap;
56+
import java.util.concurrent.BlockingQueue;
5557
import java.util.concurrent.Callable;
5658
import java.util.concurrent.CountDownLatch;
5759
import java.util.concurrent.ExecutorService;
5860
import java.util.concurrent.Executors;
5961
import java.util.concurrent.Future;
62+
import java.util.concurrent.ScheduledExecutorService;
63+
import java.util.concurrent.ThreadPoolExecutor;
6064
import java.util.concurrent.TimeUnit;
6165
import java.util.concurrent.atomic.AtomicBoolean;
6266
import java.util.concurrent.atomic.AtomicInteger;
@@ -161,6 +165,8 @@
161165
import org.apache.hadoop.hbase.wal.WALProvider;
162166
import org.apache.hadoop.hbase.wal.WALProvider.Writer;
163167
import org.apache.hadoop.hbase.wal.WALSplitUtil;
168+
import org.apache.hadoop.hbase.wal.WALSplitter;
169+
import org.apache.hadoop.metrics2.MetricsExecutor;
164170
import org.junit.After;
165171
import org.junit.Assert;
166172
import org.junit.Before;
@@ -6281,6 +6287,45 @@ public void testBulkLoadReplicationEnabled() throws IOException {
62816287
getCoprocessors().contains(ReplicationObserver.class.getSimpleName()));
62826288
}
62836289

6290+
// make sure region is success close when coprocessor wrong region open failed
6291+
@Test
6292+
public void testOpenRegionFailedMemoryLeak() throws Exception {
6293+
final ServerName serverName = ServerName.valueOf("testOpenRegionFailed", 100, 42);
6294+
final RegionServerServices rss = spy(TEST_UTIL.createMockRegionServerService(serverName));
6295+
6296+
HTableDescriptor htd
6297+
= new HTableDescriptor(TableName.valueOf("testOpenRegionFailed"));
6298+
htd.addFamily(new HColumnDescriptor(fam1));
6299+
htd.setValue("COPROCESSOR$1", "hdfs://test/test.jar|test||");
6300+
6301+
HRegionInfo hri = new HRegionInfo(htd.getTableName(),
6302+
HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY);
6303+
ScheduledExecutorService executor = CompatibilitySingletonFactory.
6304+
getInstance(MetricsExecutor.class).getExecutor();
6305+
for (int i = 0; i < 20 ; i++) {
6306+
try {
6307+
HRegion.openHRegion(hri, htd, rss.getWAL(hri),
6308+
TEST_UTIL.getConfiguration(), rss, null);
6309+
}catch(Throwable t){
6310+
LOG.info("Expected exception, continue");
6311+
}
6312+
}
6313+
TimeUnit.SECONDS.sleep(MetricsRegionWrapperImpl.PERIOD);
6314+
Field[] fields = ThreadPoolExecutor.class.getDeclaredFields();
6315+
boolean found = false;
6316+
for(Field field : fields){
6317+
if(field.getName().equals("workQueue")){
6318+
field.setAccessible(true);
6319+
BlockingQueue<Runnable> workQueue = (BlockingQueue<Runnable>)field.get(executor);
6320+
//there are still two task not cancel, can not cause to memory lack
6321+
Assert.assertTrue("ScheduledExecutor#workQueue should equals 2, now is " +
6322+
workQueue.size() + ", please check region is close", 2 == workQueue.size());
6323+
found = true;
6324+
}
6325+
}
6326+
Assert.assertTrue("can not find workQueue, test failed", found);
6327+
}
6328+
62846329
/**
62856330
* The same as HRegion class, the only difference is that instantiateHStore will
62866331
* create a different HStore - HStoreForTesting. [HBASE-8518]

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestOpenSeqNumUnexpectedIncrease.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import static org.junit.Assert.assertEquals;
2121

2222
import java.io.IOException;
23+
import java.util.List;
24+
import java.util.Map;
2325
import java.util.concurrent.atomic.AtomicInteger;
2426
import org.apache.hadoop.conf.Configuration;
2527
import org.apache.hadoop.fs.FileSystem;
@@ -74,6 +76,11 @@ protected void writeRegionOpenMarker(WAL wal, long openSeqId) throws IOException
7476
throw new IOException("Inject error for testing");
7577
}
7678
}
79+
80+
public Map<byte[], List<HStoreFile>> close() throws IOException {
81+
//skip close
82+
return null;
83+
}
7784
}
7885

7986
@BeforeClass

0 commit comments

Comments
 (0)