Skip to content

Commit 19d8e33

Browse files
committed
HBASE-22686 ZkSplitLogWorkerCoordination doesn't allow a regionserver to pick up all of the split work it is capable of (#377)
Signed-off-by: Xu Cang <xcang@apache.org>
1 parent 25f9070 commit 19d8e33

File tree

2 files changed

+9
-77
lines changed

2 files changed

+9
-77
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -324,34 +324,10 @@ public boolean progress() {
324324
}
325325

326326
/**
327-
* This function calculates how many splitters this RS should create based on expected average
328-
* tasks per RS and the hard limit upper bound(maxConcurrentTasks) set by configuration. <br>
329-
* At any given time, a RS allows spawn MIN(Expected Tasks/RS, Hard Upper Bound)
330-
* @param numTasks total number of split tasks available
331-
* @return number of tasks this RS can grab
332-
*/
333-
private int getNumExpectedTasksPerRS(int numTasks) {
334-
// at lease one RS(itself) available
335-
int availableRSs = 1;
336-
try {
337-
List<String> regionServers =
338-
ZKUtil.listChildrenNoWatch(watcher, watcher.getZNodePaths().rsZNode);
339-
availableRSs = Math.max(availableRSs, (regionServers == null) ? 0 : regionServers.size());
340-
} catch (KeeperException e) {
341-
// do nothing
342-
LOG.debug("getAvailableRegionServers got ZooKeeper exception", e);
343-
}
344-
int expectedTasksPerRS = (numTasks / availableRSs) + ((numTasks % availableRSs == 0) ? 0 : 1);
345-
return Math.max(1, expectedTasksPerRS); // at least be one
346-
}
347-
348-
/**
349-
* @param expectedTasksPerRS Average number of tasks to be handled by each RS
350327
* @return true if more splitters are available, otherwise false.
351328
*/
352-
private boolean areSplittersAvailable(int expectedTasksPerRS) {
353-
return (Math.min(expectedTasksPerRS, maxConcurrentTasks)
354-
- this.tasksInProgress.get()) > 0;
329+
private boolean areSplittersAvailable() {
330+
return maxConcurrentTasks - tasksInProgress.get() > 0;
355331
}
356332

357333
/**
@@ -432,22 +408,25 @@ public void taskLoop() throws InterruptedException {
432408
}
433409
}
434410
int numTasks = paths.size();
435-
int expectedTasksPerRS = getNumExpectedTasksPerRS(numTasks);
436411
boolean taskGrabbed = false;
437412
for (int i = 0; i < numTasks; i++) {
438413
while (!shouldStop) {
439-
if (this.areSplittersAvailable(expectedTasksPerRS)) {
440-
LOG.debug("Current region server " + server.getServerName()
414+
if (this.areSplittersAvailable()) {
415+
if (LOG.isTraceEnabled()) {
416+
LOG.trace("Current region server " + server.getServerName()
441417
+ " is ready to take more tasks, will get task list and try grab tasks again.");
418+
}
442419
int idx = (i + offset) % paths.size();
443420
// don't call ZKSplitLog.getNodeName() because that will lead to
444421
// double encoding of the path name
445422
taskGrabbed |= grabTask(ZNodePaths.joinZNode(
446423
watcher.getZNodePaths().splitLogZNode, paths.get(idx)));
447424
break;
448425
} else {
449-
LOG.debug("Current region server " + server.getServerName() + " has "
426+
if (LOG.isTraceEnabled()) {
427+
LOG.trace("Current region server " + server.getServerName() + " has "
450428
+ this.tasksInProgress.get() + " tasks in progress and can't take more.");
429+
}
451430
Thread.sleep(100);
452431
}
453432
}

hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitLogWorker.java

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -472,53 +472,6 @@ public void testAcquireMultiTasks() throws Exception {
472472
}
473473
}
474474

475-
/**
476-
* The test checks SplitLogWorker should not spawn more splitters than expected num of tasks per
477-
* RS
478-
* @throws Exception
479-
*/
480-
@Test
481-
public void testAcquireMultiTasksByAvgTasksPerRS() throws Exception {
482-
LOG.info("testAcquireMultiTasks");
483-
SplitLogCounters.resetCounters();
484-
final String TATAS = "tatas";
485-
final ServerName RS = ServerName.valueOf("rs,1,1");
486-
final ServerName RS2 = ServerName.valueOf("rs,1,2");
487-
final int maxTasks = 3;
488-
Configuration testConf = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
489-
testConf.setInt(HBASE_SPLIT_WAL_MAX_SPLITTER, maxTasks);
490-
RegionServerServices mockedRS = getRegionServer(RS);
491-
492-
// create two RS nodes
493-
String rsPath = ZNodePaths.joinZNode(zkw.getZNodePaths().rsZNode, RS.getServerName());
494-
zkw.getRecoverableZooKeeper().create(rsPath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
495-
rsPath = ZNodePaths.joinZNode(zkw.getZNodePaths().rsZNode, RS2.getServerName());
496-
zkw.getRecoverableZooKeeper().create(rsPath, null, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
497-
498-
for (int i = 0; i < maxTasks; i++) {
499-
zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS + i),
500-
new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1")).toByteArray(),
501-
Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
502-
}
503-
504-
SplitLogWorker slw = new SplitLogWorker(ds, testConf, mockedRS, neverEndingTask);
505-
slw.start();
506-
try {
507-
int acquiredTasks = 0;
508-
waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 2, WAIT_TIME);
509-
for (int i = 0; i < maxTasks; i++) {
510-
byte[] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS + i));
511-
SplitLogTask slt = SplitLogTask.parseFrom(bytes);
512-
if (slt.isOwned(RS)) {
513-
acquiredTasks++;
514-
}
515-
}
516-
assertEquals(2, acquiredTasks);
517-
} finally {
518-
stopSplitLogWorker(slw);
519-
}
520-
}
521-
522475
/**
523476
* Create a mocked region server service instance
524477
*/

0 commit comments

Comments
 (0)