Skip to content

Commit 0ca5686

Browse files
rdingankarRavindra Dingankar
andauthored
HDFS-16917 Add transfer rate quantile metrics for DataNode reads (#5397)
Co-authored-by: Ravindra Dingankar <rdingankar@linkedin.com>
1 parent 61f369c commit 0ca5686

File tree

6 files changed

+55
-0
lines changed

6 files changed

+55
-0
lines changed

hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,9 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
370370
|:---- |:---- |
371371
| `BytesWritten` | Total number of bytes written to DataNode |
372372
| `BytesRead` | Total number of bytes read from DataNode |
373+
| `ReadTransferRateNumOps` | Total number of data read transfers |
374+
| `ReadTransferRateAvgTime` | Average transfer rate of bytes read from DataNode, measured in bytes per second. |
375+
| `ReadTransferRate`*num*`s(50/75/90/95/99)thPercentileRate` | The 50/75/90/95/99th percentile of the transfer rate of bytes read from DataNode, measured in bytes per second. |
373376
| `BlocksWritten` | Total number of blocks written to DataNode |
374377
| `BlocksRead` | Total number of blocks read from DataNode |
375378
| `BlocksReplicated` | Total number of blocks replicated |

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.apache.commons.lang3.StringUtils;
7070
import org.apache.hadoop.fs.ParentNotDirectoryException;
7171
import org.apache.hadoop.fs.UnresolvedLinkException;
72+
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7273
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
7374
import org.apache.hadoop.hdfs.server.namenode.INodesInPath;
7475
import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -1936,4 +1937,18 @@ public static boolean isParentEntry(final String path, final String parent) {
19361937
return path.charAt(parent.length()) == Path.SEPARATOR_CHAR
19371938
|| parent.equals(Path.SEPARATOR);
19381939
}
1940+
1941+
/**
1942+
* Add transfer rate metrics for valid data read and duration values.
1943+
* @param metrics metrics for datanodes
1944+
* @param read bytes read
1945+
* @param duration read duration
1946+
*/
1947+
public static void addTransferRateMetric(final DataNodeMetrics metrics, final long read, final long duration) {
1948+
if (read >= 0 && duration > 0) {
1949+
metrics.addReadTransferRate(read * 1000 / duration);
1950+
} else {
1951+
LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration);
1952+
}
1953+
}
19391954
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.hadoop.hdfs.server.datanode;
1919

2020
import org.apache.hadoop.classification.VisibleForTesting;
21+
import org.apache.hadoop.hdfs.DFSUtil;
2122
import org.apache.hadoop.util.Preconditions;
2223
import org.apache.hadoop.thirdparty.protobuf.ByteString;
2324
import javax.crypto.SecretKey;
@@ -632,6 +633,7 @@ public void readBlock(final ExtendedBlock block,
632633
datanode.metrics.incrBytesRead((int) read);
633634
datanode.metrics.incrBlocksRead();
634635
datanode.metrics.incrTotalReadTime(duration);
636+
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
635637
} catch ( SocketException ignored ) {
636638
LOG.trace("{}:Ignoring exception while serving {} to {}",
637639
dnR, block, remoteAddress, ignored);
@@ -1122,6 +1124,7 @@ public void copyBlock(final ExtendedBlock block,
11221124
datanode.metrics.incrBytesRead((int) read);
11231125
datanode.metrics.incrBlocksRead();
11241126
datanode.metrics.incrTotalReadTime(duration);
1127+
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
11251128

11261129
LOG.info("Copied {} to {}", block, peer.getRemoteAddressString());
11271130
} catch (IOException ioe) {

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ public class DataNodeMetrics {
6161
@Metric MutableCounterLong bytesRead;
6262
@Metric("Milliseconds spent reading")
6363
MutableCounterLong totalReadTime;
64+
@Metric private MutableRate readTransferRate;
65+
final private MutableQuantiles[] readTransferRateQuantiles;
6466
@Metric MutableCounterLong blocksWritten;
6567
@Metric MutableCounterLong blocksRead;
6668
@Metric MutableCounterLong blocksReplicated;
@@ -227,6 +229,7 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals,
227229
sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len];
228230
ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len];
229231
ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len];
232+
readTransferRateQuantiles = new MutableQuantiles[len];
230233

231234
for (int i = 0; i < len; i++) {
232235
int interval = intervals[i];
@@ -255,6 +258,10 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals,
255258
"ramDiskBlocksLazyPersistWindows" + interval + "s",
256259
"Time between the RamDisk block write and disk persist in ms",
257260
"ops", "latency", interval);
261+
readTransferRateQuantiles[i] = registry.newQuantiles(
262+
"readTransferRate" + interval + "s",
263+
"Rate at which bytes are read from datanode calculated in bytes per second",
264+
"ops", "rate", interval);
258265
}
259266
}
260267

@@ -316,6 +323,13 @@ public void addIncrementalBlockReport(long latency,
316323
}
317324
}
318325

326+
public void addReadTransferRate(long readTransferRate) {
327+
this.readTransferRate.add(readTransferRate);
328+
for (MutableQuantiles q : readTransferRateQuantiles) {
329+
q.add(readTransferRate);
330+
}
331+
}
332+
319333
public void addCacheReport(long latency) {
320334
cacheReports.add(latency);
321335
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import static org.junit.Assert.assertThat;
4646
import static org.junit.Assert.assertTrue;
4747
import static org.junit.Assert.fail;
48+
import static org.mockito.Mockito.*;
4849

4950
import java.io.File;
5051
import java.io.IOException;
@@ -71,6 +72,7 @@
7172
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
7273
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
7374
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
75+
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7476
import org.apache.hadoop.hdfs.server.namenode.NameNode;
7577
import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
7678
import org.apache.hadoop.http.HttpConfig;
@@ -1108,4 +1110,18 @@ public void testErrorMessageForInvalidNameservice() throws Exception {
11081110
LambdaTestUtils.intercept(IOException.class, expectedErrorMessage,
11091111
()->DFSUtil.getNNServiceRpcAddressesForCluster(conf));
11101112
}
1113+
1114+
@Test
1115+
public void testAddTransferRateMetricForValidValues() {
1116+
DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class);
1117+
DFSUtil.addTransferRateMetric(mockMetrics, 100, 10);
1118+
verify(mockMetrics).addReadTransferRate(10000);
1119+
}
1120+
1121+
@Test
1122+
public void testAddTransferRateMetricForInvalidValue() {
1123+
DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class);
1124+
DFSUtil.addTransferRateMetric(mockMetrics, 100, 0);
1125+
verify(mockMetrics, times(0)).addReadTransferRate(anyLong());
1126+
}
11111127
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ public void testTimeoutMetric() throws Exception {
380380
@Test(timeout=120000)
381381
public void testDataNodeTimeSpend() throws Exception {
382382
Configuration conf = new HdfsConfiguration();
383+
conf.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + 60);
383384
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
384385
try {
385386
final FileSystem fs = cluster.getFileSystem();
@@ -391,6 +392,7 @@ public void testDataNodeTimeSpend() throws Exception {
391392

392393
final long startWriteValue = getLongCounter("TotalWriteTime", rb);
393394
final long startReadValue = getLongCounter("TotalReadTime", rb);
395+
assertCounter("ReadTransferRateNumOps", 0L, rb);
394396
final AtomicInteger x = new AtomicInteger(0);
395397

396398
// Lets Metric system update latest metrics
@@ -410,6 +412,8 @@ public Boolean get() {
410412
MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name());
411413
final long endWriteValue = getLongCounter("TotalWriteTime", rbNew);
412414
final long endReadValue = getLongCounter("TotalReadTime", rbNew);
415+
assertCounter("ReadTransferRateNumOps", 1L, rbNew);
416+
assertQuantileGauges("ReadTransferRate" + "60s", rbNew, "Rate");
413417
return endWriteValue > startWriteValue
414418
&& endReadValue > startReadValue;
415419
}

0 commit comments

Comments
 (0)