Skip to content

Commit 1fa9a8c

Browse files
Karthik PalanisamyguangxuCheng
authored andcommitted
HBASE-23237 Prevent Negative values in metrics requestsPerSecond (#865)
Signed-off-by: Guangxu Cheng <gxcheng@apache.org> Signed-off-by: Josh Elser <elserj@apache.org>
1 parent ce34d89 commit 1fa9a8c

File tree

3 files changed

+146
-19
lines changed

3 files changed

+146
-19
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ public class HRegionServer extends HasThread implements
379379
public static final String REGIONSERVER = "regionserver";
380380

381381
private MetricsRegionServer metricsRegionServer;
382+
MetricsRegionServerWrapperImpl metricsRegionServerImpl;
382383
private SpanReceiverHost spanReceiverHost;
383384

384385
/**
@@ -1558,8 +1559,9 @@ protected void handleReportForDutyResponse(final RegionServerStartupResponse c)
15581559
// Init in here rather than in constructor after thread name has been set
15591560
final MetricsTable metricsTable =
15601561
new MetricsTable(new MetricsTableWrapperAggregateImpl(this));
1561-
this.metricsRegionServer = new MetricsRegionServer(
1562-
new MetricsRegionServerWrapperImpl(this), conf, metricsTable);
1562+
this.metricsRegionServerImpl = new MetricsRegionServerWrapperImpl(this);
1563+
this.metricsRegionServer = new MetricsRegionServer(metricsRegionServerImpl,
1564+
conf, metricsTable);
15631565
// Now that we have a metrics source, start the pause monitor
15641566
this.pauseMonitor = new JvmPauseMonitor(conf, getMetrics().getMetricsSource());
15651567
pauseMonitor.start();
@@ -3240,6 +3242,7 @@ public HRegion getRegion(final String encodedRegionName) {
32403242
@Override
32413243
public boolean removeRegion(final HRegion r, ServerName destination) {
32423244
HRegion toReturn = this.onlineRegions.remove(r.getRegionInfo().getEncodedName());
3245+
metricsRegionServerImpl.requestsCountCache.remove(r.getRegionInfo().getEncodedName());
32433246
if (destination != null) {
32443247
long closeSeqNum = r.getMaxFlushedSeqId();
32453248
if (closeSeqNum == HConstants.NO_SEQNUM) {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,13 @@
1818
package org.apache.hadoop.hbase.regionserver;
1919

2020
import java.io.IOException;
21+
import java.util.ArrayList;
2122
import java.util.Collection;
2223
import java.util.List;
24+
import java.util.Map;
2325
import java.util.OptionalDouble;
2426
import java.util.OptionalLong;
27+
import java.util.concurrent.ConcurrentHashMap;
2528
import java.util.concurrent.ScheduledExecutorService;
2629
import java.util.concurrent.TimeUnit;
2730

@@ -113,6 +116,8 @@ class MetricsRegionServerWrapperImpl
113116
private volatile long mobFileCacheCount = 0;
114117
private volatile long blockedRequestsCount = 0L;
115118
private volatile long averageRegionSize = 0L;
119+
protected final Map<String, ArrayList<Long>>
120+
requestsCountCache = new ConcurrentHashMap<String, ArrayList<Long>>();
116121

117122
private ScheduledExecutorService executor;
118123
private Runnable runnable;
@@ -652,9 +657,6 @@ public double getMobFileCacheHitPercent() {
652657
public class RegionServerMetricsWrapperRunnable implements Runnable {
653658

654659
private long lastRan = 0;
655-
private long lastRequestCount = 0;
656-
private long lastReadRequestsCount = 0;
657-
private long lastWriteRequestsCount = 0;
658660

659661
@Override
660662
synchronized public void run() {
@@ -696,7 +698,40 @@ synchronized public void run() {
696698
long tempMobScanCellsSize = 0;
697699
long tempBlockedRequestsCount = 0;
698700
int regionCount = 0;
701+
702+
long currentReadRequestsCount = 0;
703+
long currentWriteRequestsCount = 0;
704+
long lastReadRequestsCount = 0;
705+
long lastWriteRequestsCount = 0;
706+
long readRequestsDelta = 0;
707+
long writeRequestsDelta = 0;
708+
long totalReadRequestsDelta = 0;
709+
long totalWriteRequestsDelta = 0;
710+
String encodedRegionName;
699711
for (HRegion r : regionServer.getOnlineRegionsLocalContext()) {
712+
encodedRegionName = r.getRegionInfo().getEncodedName();
713+
currentReadRequestsCount = r.getReadRequestsCount();
714+
currentWriteRequestsCount = r.getWriteRequestsCount();
715+
if (requestsCountCache.containsKey(encodedRegionName)) {
716+
lastReadRequestsCount = requestsCountCache.get(encodedRegionName).get(0);
717+
lastWriteRequestsCount = requestsCountCache.get(encodedRegionName).get(1);
718+
readRequestsDelta = currentReadRequestsCount - lastReadRequestsCount;
719+
writeRequestsDelta = currentWriteRequestsCount - lastWriteRequestsCount;
720+
totalReadRequestsDelta += readRequestsDelta;
721+
totalWriteRequestsDelta += writeRequestsDelta;
722+
//Update cache for our next comparision
723+
requestsCountCache.get(encodedRegionName).set(0,currentReadRequestsCount);
724+
requestsCountCache.get(encodedRegionName).set(1,currentWriteRequestsCount);
725+
} else {
726+
// List[0] -> readRequestCount
727+
// List[1] -> writeRequestCount
728+
ArrayList<Long> requests = new ArrayList<Long>(2);
729+
requests.add(currentReadRequestsCount);
730+
requests.add(currentWriteRequestsCount);
731+
requestsCountCache.put(encodedRegionName, requests);
732+
totalReadRequestsDelta += currentReadRequestsCount;
733+
totalWriteRequestsDelta += currentWriteRequestsCount;
734+
}
700735
tempNumMutationsWithoutWAL += r.getNumMutationsWithoutWAL();
701736
tempDataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL();
702737
tempReadRequestsCount += r.getReadRequestsCount();
@@ -783,25 +818,14 @@ synchronized public void run() {
783818
}
784819
// If we've time traveled keep the last requests per second.
785820
if ((currentTime - lastRan) > 0) {
786-
long currentRequestCount = getTotalRowActionRequestCount();
787-
requestsPerSecond = (currentRequestCount - lastRequestCount) /
821+
requestsPerSecond = (totalReadRequestsDelta + totalWriteRequestsDelta) /
788822
((currentTime - lastRan) / 1000.0);
789-
lastRequestCount = currentRequestCount;
790-
791-
long intervalReadRequestsCount = tempReadRequestsCount - lastReadRequestsCount;
792-
long intervalWriteRequestsCount = tempWriteRequestsCount - lastWriteRequestsCount;
793823

794-
double readRequestsRatePerMilliSecond = ((double)intervalReadRequestsCount/
795-
(double)period);
796-
double writeRequestsRatePerMilliSecond = ((double)intervalWriteRequestsCount/
797-
(double)period);
824+
double readRequestsRatePerMilliSecond = (double)totalReadRequestsDelta / period;
825+
double writeRequestsRatePerMilliSecond = (double)totalWriteRequestsDelta / period;
798826

799827
readRequestsRatePerSecond = readRequestsRatePerMilliSecond * 1000.0;
800828
writeRequestsRatePerSecond = writeRequestsRatePerMilliSecond * 1000.0;
801-
802-
lastReadRequestsCount = tempReadRequestsCount;
803-
lastWriteRequestsCount = tempWriteRequestsCount;
804-
805829
}
806830
lastRan = currentTime;
807831

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.apache.hadoop.hbase.regionserver;
17+
18+
19+
import java.io.IOException;
20+
import org.apache.hadoop.conf.Configuration;
21+
import org.apache.hadoop.hbase.HBaseClassTestRule;
22+
import org.apache.hadoop.hbase.HBaseTestingUtility;
23+
import org.apache.hadoop.hbase.HConstants;
24+
import org.apache.hadoop.hbase.ServerName;
25+
import org.apache.hadoop.hbase.TableName;
26+
import org.apache.hadoop.hbase.client.Admin;
27+
import org.apache.hadoop.hbase.client.Table;
28+
import org.apache.hadoop.hbase.testclassification.MediumTests;
29+
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
30+
import org.junit.AfterClass;
31+
import org.junit.Assert;
32+
import org.junit.BeforeClass;
33+
import org.junit.ClassRule;
34+
import org.junit.Test;
35+
import org.junit.experimental.categories.Category;
36+
37+
/**
38+
* Validate requestsPerSecond metric.
39+
*/
40+
@Category({ RegionServerTests.class, MediumTests.class })
41+
public class TestRequestsPerSecondMetric {
42+
43+
@ClassRule
44+
public static final HBaseClassTestRule CLASS_RULE =
45+
HBaseClassTestRule.forClass(TestRequestsPerSecondMetric.class);
46+
47+
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
48+
private static final long METRICS_PERIOD = 2000L;
49+
private static Configuration conf;
50+
51+
52+
@BeforeClass
53+
public static void setup() throws Exception {
54+
conf = UTIL.getConfiguration();
55+
conf.setLong(HConstants.REGIONSERVER_METRICS_PERIOD, METRICS_PERIOD);
56+
UTIL.startMiniCluster(1);
57+
}
58+
59+
@AfterClass
60+
public static void teardown() throws Exception {
61+
UTIL.shutdownMiniCluster();
62+
}
63+
64+
65+
@Test
66+
/**
67+
* This test will confirm no negative value in requestsPerSecond metric during any region
68+
* transition(close region/remove region/move region).
69+
* Firstly, load 2000 random rows for 25 regions and will trigger a metric.
70+
* Now, metricCache will have a current read and write requests count.
71+
* Next, we disable a table and all of its 25 regions will be closed.
72+
* As part of region close, his metric will also be removed from metricCache.
73+
* prior to HBASE-23237, we do not remove/reset his metric so we incorrectly compute
74+
* (currentRequestCount - lastRequestCount) which result into negative value.
75+
*
76+
* @throws IOException
77+
* @throws InterruptedException
78+
*/
79+
public void testNoNegativeSignAtRequestsPerSecond() throws IOException, InterruptedException {
80+
final TableName TABLENAME = TableName.valueOf("t");
81+
final String FAMILY = "f";
82+
Admin admin = UTIL.getAdmin();
83+
UTIL.createMultiRegionTable(TABLENAME, FAMILY.getBytes(),25);
84+
Table table = admin.getConnection().getTable(TABLENAME);
85+
ServerName serverName = admin.getRegionServers().iterator().next();
86+
HRegionServer regionServer = UTIL.getMiniHBaseCluster().getRegionServer(serverName);
87+
MetricsRegionServerWrapperImpl metricsWrapper =
88+
new MetricsRegionServerWrapperImpl(regionServer);
89+
MetricsRegionServerWrapperImpl.RegionServerMetricsWrapperRunnable metricsServer
90+
= metricsWrapper.new RegionServerMetricsWrapperRunnable();
91+
metricsServer.run();
92+
UTIL.loadRandomRows(table, FAMILY.getBytes(), 1, 2000);
93+
Thread.sleep(METRICS_PERIOD);
94+
metricsServer.run();
95+
admin.disableTable(TABLENAME);
96+
Thread.sleep(METRICS_PERIOD);
97+
metricsServer.run();
98+
Assert.assertTrue(metricsWrapper.getRequestsPerSecond() > -1);
99+
}
100+
}

0 commit comments

Comments
 (0)