Skip to content

Commit e8cbc3f

Browse files
zhuyaogaijay.zhu
andauthored
HBASE-27853 Add client side table metrics for rpc calls and request latency. (#5406)
Co-authored-by: jay.zhu <jay.zhu@huolala.cn> Signed-off-by: Bryan Beaudreault <bbeaudreault@apache.org>
1 parent 1df357e commit e8cbc3f

14 files changed

+406
-74
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncBatchRpcRetryingCaller.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ private void sendToServer(ServerName serverName, ServerRequest serverReq, int tr
395395
}
396396
HBaseRpcController controller = conn.rpcControllerFactory.newController();
397397
resetController(controller, Math.min(rpcTimeoutNs, remainingNs),
398-
calcPriority(serverReq.getPriority(), tableName));
398+
calcPriority(serverReq.getPriority(), tableName), tableName);
399399
controller.setRequestAttributes(requestAttributes);
400400
if (!cells.isEmpty()) {
401401
controller.setCellScanner(createCellScanner(cells));

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ public AsyncConnectionImpl(Configuration conf, ConnectionRegistry registry, Stri
139139
this.connConf = new AsyncConnectionConfiguration(conf);
140140
this.registry = registry;
141141
if (conf.getBoolean(CLIENT_SIDE_METRICS_ENABLED_KEY, false)) {
142-
this.metrics =
143-
Optional.of(MetricsConnection.getMetricsConnection(metricsScope, () -> null, () -> null));
142+
this.metrics = Optional
143+
.of(MetricsConnection.getMetricsConnection(conf, metricsScope, () -> null, () -> null));
144144
} else {
145145
this.metrics = Optional.empty();
146146
}

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRpcRetryingCaller.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ protected final void resetCallTimeout() {
121121
} else {
122122
callTimeoutNs = rpcTimeoutNs;
123123
}
124-
resetController(controller, callTimeoutNs, priority);
124+
resetController(controller, callTimeoutNs, priority, getTableName().orElse(null));
125125
}
126126

127127
private void tryScheduleRetry(Throwable error) {

hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncScanSingleRegionRpcRetryingCaller.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ private long elapsedMs() {
354354

355355
private void closeScanner() {
356356
incRPCCallsMetrics(scanMetrics, regionServerRemote);
357-
resetController(controller, rpcTimeoutNs, HConstants.HIGH_QOS);
357+
resetController(controller, rpcTimeoutNs, HConstants.HIGH_QOS, loc.getRegion().getTable());
358358
ScanRequest req = RequestConverter.buildScanRequest(this.scannerId, 0, true, false);
359359
stub.scan(controller, req, resp -> {
360360
if (controller.failed()) {
@@ -573,7 +573,7 @@ private void call() {
573573
if (tries > 1) {
574574
incRPCRetriesMetrics(scanMetrics, regionServerRemote);
575575
}
576-
resetController(controller, callTimeoutNs, priority);
576+
resetController(controller, callTimeoutNs, priority, loc.getRegion().getTable());
577577
ScanRequest req = RequestConverter.buildScanRequest(scannerId, scan.getCaching(), false,
578578
nextCallSeq, scan.isScanMetricsEnabled(), false, scan.getLimit());
579579
final Context context = Context.current();
@@ -595,7 +595,7 @@ private void next() {
595595
private void renewLease() {
596596
incRPCCallsMetrics(scanMetrics, regionServerRemote);
597597
nextCallSeq++;
598-
resetController(controller, rpcTimeoutNs, priority);
598+
resetController(controller, rpcTimeoutNs, priority, loc.getRegion().getTable());
599599
ScanRequest req =
600600
RequestConverter.buildScanRequest(scannerId, 0, false, nextCallSeq, false, true, -1);
601601
stub.scan(controller, req, resp -> {

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,8 @@ public class ConnectionImplementation implements ClusterConnection, Closeable {
359359

360360
if (conf.getBoolean(CLIENT_SIDE_METRICS_ENABLED_KEY, false)) {
361361
this.metricsScope = MetricsConnection.getScope(conf, clusterId, this);
362-
this.metrics = MetricsConnection.getMetricsConnection(this.metricsScope, this::getBatchPool,
363-
this::getMetaLookupPool);
362+
this.metrics = MetricsConnection.getMetricsConnection(conf, this.metricsScope,
363+
this::getBatchPool, this::getMetaLookupPool);
364364
} else {
365365
this.metrics = null;
366366
}

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,13 +223,17 @@ static boolean isEmptyStopRow(byte[] row) {
223223
return Bytes.equals(row, EMPTY_END_ROW);
224224
}
225225

226-
static void resetController(HBaseRpcController controller, long timeoutNs, int priority) {
226+
static void resetController(HBaseRpcController controller, long timeoutNs, int priority,
227+
TableName tableName) {
227228
controller.reset();
228229
if (timeoutNs >= 0) {
229230
controller.setCallTimeout(
230231
(int) Math.min(Integer.MAX_VALUE, TimeUnit.NANOSECONDS.toMillis(timeoutNs)));
231232
}
232233
controller.setPriority(priority);
234+
if (tableName != null) {
235+
controller.setTableName(tableName);
236+
}
233237
}
234238

235239
static Throwable translateException(Throwable t) {

hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@
3434
import java.util.concurrent.ThreadPoolExecutor;
3535
import java.util.concurrent.TimeUnit;
3636
import java.util.function.Supplier;
37+
import org.apache.commons.lang3.StringUtils;
3738
import org.apache.hadoop.conf.Configuration;
3839
import org.apache.hadoop.hbase.ServerName;
40+
import org.apache.hadoop.hbase.TableName;
3941
import org.apache.hadoop.hbase.util.Bytes;
4042
import org.apache.hadoop.ipc.RemoteException;
4143
import org.apache.yetus.audience.InterfaceAudience;
@@ -51,22 +53,22 @@
5153
* This class is for maintaining the various connection statistics and publishing them through the
5254
* metrics interfaces. This class manages its own {@link MetricRegistry} and {@link JmxReporter} so
5355
* as to not conflict with other uses of Yammer Metrics within the client application. Calling
54-
* {@link #getMetricsConnection(String, Supplier, Supplier)} implicitly creates and "starts"
55-
* instances of these classes; be sure to call {@link #deleteMetricsConnection(String)} to terminate
56-
* the thread pools they allocate. The metrics reporter will be shutdown {@link #shutdown()} when
57-
* all connections within this metrics instances are closed.
56+
* {@link #getMetricsConnection(Configuration, String, Supplier, Supplier)} implicitly creates and
57+
* "starts" instances of these classes; be sure to call {@link #deleteMetricsConnection(String)} to
58+
* terminate the thread pools they allocate. The metrics reporter will be shutdown
59+
* {@link #shutdown()} when all connections within this metrics instances are closed.
5860
*/
5961
@InterfaceAudience.Private
6062
public final class MetricsConnection implements StatisticTrackable {
6163

6264
private static final ConcurrentMap<String, MetricsConnection> METRICS_INSTANCES =
6365
new ConcurrentHashMap<>();
6466

65-
static MetricsConnection getMetricsConnection(final String scope,
67+
static MetricsConnection getMetricsConnection(final Configuration conf, final String scope,
6668
Supplier<ThreadPoolExecutor> batchPool, Supplier<ThreadPoolExecutor> metaPool) {
6769
return METRICS_INSTANCES.compute(scope, (s, metricsConnection) -> {
6870
if (metricsConnection == null) {
69-
MetricsConnection newMetricsConn = new MetricsConnection(scope, batchPool, metaPool);
71+
MetricsConnection newMetricsConn = new MetricsConnection(conf, scope, batchPool, metaPool);
7072
newMetricsConn.incrConnectionCount();
7173
return newMetricsConn;
7274
} else {
@@ -91,6 +93,10 @@ static void deleteMetricsConnection(final String scope) {
9193
/** Set this key to {@code true} to enable metrics collection of client requests. */
9294
public static final String CLIENT_SIDE_METRICS_ENABLED_KEY = "hbase.client.metrics.enable";
9395

96+
/** Set this key to {@code true} to enable table metrics collection of client requests. */
97+
public static final String CLIENT_SIDE_TABLE_METRICS_ENABLED_KEY =
98+
"hbase.client.table.metrics.enable";
99+
94100
/**
95101
* Set to specify a custom scope for the metrics published through {@link MetricsConnection}. The
96102
* scope is added to JMX MBean objectName, and defaults to a combination of the Connection's
@@ -311,6 +317,7 @@ private static interface NewMetric<T> {
311317
private final MetricRegistry registry;
312318
private final JmxReporter reporter;
313319
private final String scope;
320+
private final boolean tableMetricsEnabled;
314321

315322
private final NewMetric<Timer> timerFactory = new NewMetric<Timer>() {
316323
@Override
@@ -378,9 +385,10 @@ public Counter newMetric(Class<?> clazz, String name, String scope) {
378385
private final ConcurrentMap<String, Counter> rpcCounters =
379386
new ConcurrentHashMap<>(CAPACITY, LOAD_FACTOR, CONCURRENCY_LEVEL);
380387

381-
private MetricsConnection(String scope, Supplier<ThreadPoolExecutor> batchPool,
382-
Supplier<ThreadPoolExecutor> metaPool) {
388+
private MetricsConnection(Configuration conf, String scope,
389+
Supplier<ThreadPoolExecutor> batchPool, Supplier<ThreadPoolExecutor> metaPool) {
383390
this.scope = scope;
391+
this.tableMetricsEnabled = conf.getBoolean(CLIENT_SIDE_TABLE_METRICS_ENABLED_KEY, false);
384392
addThreadPools(batchPool, metaPool);
385393
this.registry = new MetricRegistry();
386394
this.registry.register(getExecutorPoolName(), new RatioGauge() {
@@ -520,6 +528,16 @@ public ConcurrentMap<String, Counter> getRpcCounters() {
520528
return rpcCounters;
521529
}
522530

531+
/** rpcTimers metric */
532+
public ConcurrentMap<String, Timer> getRpcTimers() {
533+
return rpcTimers;
534+
}
535+
536+
/** rpcHistograms metric */
537+
public ConcurrentMap<String, Histogram> getRpcHistograms() {
538+
return rpcHistograms;
539+
}
540+
523541
/** getTracker metric */
524542
public CallTracker getGetTracker() {
525543
return getTracker;
@@ -694,7 +712,8 @@ private void shutdown() {
694712
}
695713

696714
/** Report RPC context to metrics system. */
697-
public void updateRpc(MethodDescriptor method, Message param, CallStats stats, Throwable e) {
715+
public void updateRpc(MethodDescriptor method, TableName tableName, Message param,
716+
CallStats stats, Throwable e) {
698717
int callsPerServer = stats.getConcurrentCallsPerServer();
699718
if (callsPerServer > 0) {
700719
concurrentCallsPerServerHist.update(callsPerServer);
@@ -744,29 +763,33 @@ public void updateRpc(MethodDescriptor method, Message param, CallStats stats, T
744763
case 0:
745764
assert "Get".equals(method.getName());
746765
getTracker.updateRpc(stats);
766+
updateTableMetric(methodName.toString(), tableName, stats, e);
747767
return;
748768
case 1:
749769
assert "Mutate".equals(method.getName());
750770
final MutationType mutationType = ((MutateRequest) param).getMutation().getMutateType();
751771
switch (mutationType) {
752772
case APPEND:
753773
appendTracker.updateRpc(stats);
754-
return;
774+
break;
755775
case DELETE:
756776
deleteTracker.updateRpc(stats);
757-
return;
777+
break;
758778
case INCREMENT:
759779
incrementTracker.updateRpc(stats);
760-
return;
780+
break;
761781
case PUT:
762782
putTracker.updateRpc(stats);
763-
return;
783+
break;
764784
default:
765785
throw new RuntimeException("Unrecognized mutation type " + mutationType);
766786
}
787+
updateTableMetric(methodName.toString(), tableName, stats, e);
788+
return;
767789
case 2:
768790
assert "Scan".equals(method.getName());
769791
scanTracker.updateRpc(stats);
792+
updateTableMetric(methodName.toString(), tableName, stats, e);
770793
return;
771794
case 3:
772795
assert "BulkLoadHFile".equals(method.getName());
@@ -792,6 +815,7 @@ public void updateRpc(MethodDescriptor method, Message param, CallStats stats, T
792815
assert "Multi".equals(method.getName());
793816
numActionsPerServerHist.update(stats.getNumActionsPerServer());
794817
multiTracker.updateRpc(stats);
818+
updateTableMetric(methodName.toString(), tableName, stats, e);
795819
return;
796820
default:
797821
throw new RuntimeException("Unrecognized ClientService RPC type " + method.getFullName());
@@ -801,6 +825,26 @@ public void updateRpc(MethodDescriptor method, Message param, CallStats stats, T
801825
updateRpcGeneric(methodName.toString(), stats);
802826
}
803827

828+
/** Report table rpc context to metrics system. */
829+
private void updateTableMetric(String methodName, TableName tableName, CallStats stats,
830+
Throwable e) {
831+
if (tableMetricsEnabled) {
832+
if (methodName != null) {
833+
String table = tableName != null && StringUtils.isNotEmpty(tableName.getNameAsString())
834+
? tableName.getNameAsString()
835+
: "unknown";
836+
String metricKey = methodName + "_" + table;
837+
// update table rpc context to metrics system,
838+
// includes rpc call duration, rpc call request/response size(bytes).
839+
updateRpcGeneric(metricKey, stats);
840+
if (e != null) {
841+
// rpc failure call counter with table name.
842+
getMetric(FAILURE_CNT_BASE + metricKey, rpcCounters, counterFactory).inc();
843+
}
844+
}
845+
}
846+
}
847+
804848
public void incrCacheDroppingExceptions(Object exception) {
805849
getMetric(
806850
CACHE_BASE + (exception == null ? UNKNOWN_EXCEPTION : exception.getClass().getSimpleName()),

hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionServerCallable.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ public T call(int callTimeout) throws IOException {
123123
hrc.setPriority(priority);
124124
hrc.setCallTimeout(callTimeout);
125125
hrc.setRequestAttributes(requestAttributes);
126+
if (tableName != null) {
127+
hrc.setTableName(tableName);
128+
}
126129
}
127130
}
128131
return rpcCall();

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ private void onCallFinished(Call call, HBaseRpcController hrc, Address addr,
379379
RpcCallback<Message> callback) {
380380
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime());
381381
if (metrics != null) {
382-
metrics.updateRpc(call.md, call.param, call.callStats, call.error);
382+
metrics.updateRpc(call.md, hrc.getTableName(), call.param, call.callStats, call.error);
383383
}
384384
if (LOG.isTraceEnabled()) {
385385
LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms, status: {}", call.id,

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/DelegatingHBaseRpcController.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,4 +143,14 @@ public void notifyOnCancel(RpcCallback<Object> callback, CancellationCallback ac
143143
throws IOException {
144144
delegate.notifyOnCancel(callback, action);
145145
}
146+
147+
@Override
148+
public void setTableName(TableName tableName) {
149+
delegate.setTableName(tableName);
150+
}
151+
152+
@Override
153+
public TableName getTableName() {
154+
return delegate.getTableName();
155+
}
146156
}

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseRpcController.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,14 @@ default boolean hasRegionInfo() {
130130
default RegionInfo getRegionInfo() {
131131
return null;
132132
}
133+
134+
/** Sets Region's table name. */
135+
default void setTableName(TableName tableName) {
136+
137+
}
138+
139+
/** Returns Region's table name or null if not available or pertinent. */
140+
default TableName getTableName() {
141+
return null;
142+
}
133143
}

hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/HBaseRpcControllerImpl.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
5151

5252
private IOException exception;
5353

54+
private TableName tableName;
55+
5456
/**
5557
* Rpc target Region's RegionInfo we are going against. May be null.
5658
* @see #hasRegionInfo()
@@ -144,6 +146,7 @@ public void reset() {
144146
exception = null;
145147
callTimeout = null;
146148
regionInfo = null;
149+
tableName = null;
147150
// In the implementations of some callable with replicas, rpc calls are executed in a executor
148151
// and we could cancel the operation from outside which means there could be a race between
149152
// reset and startCancel. Although I think the race should be handled by the callable since the
@@ -281,4 +284,14 @@ public String toString() {
281284
+ exception + ", regionInfo=" + regionInfo + ", priority=" + priority + ", cellScanner="
282285
+ cellScanner + '}';
283286
}
287+
288+
@Override
289+
public void setTableName(TableName tableName) {
290+
this.tableName = tableName;
291+
}
292+
293+
@Override
294+
public TableName getTableName() {
295+
return tableName;
296+
}
284297
}

0 commit comments

Comments
 (0)