Skip to content

Commit 94b3c6d

Browse files
rdingankarRavindra Dingankar
authored andcommitted
HDFS-16917 Add transfer rate quantile metrics for DataNode reads (#5397)
Co-authored-by: Ravindra Dingankar <rdingankar@linkedin.com>
1 parent c25ac78 commit 94b3c6d

File tree

6 files changed

+55
-0
lines changed

6 files changed

+55
-0
lines changed

hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,9 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
368368
|:---- |:---- |
369369
| `BytesWritten` | Total number of bytes written to DataNode |
370370
| `BytesRead` | Total number of bytes read from DataNode |
371+
| `ReadTransferRateNumOps` | Total number of data read transfers |
372+
| `ReadTransferRateAvgTime` | Average transfer rate of bytes read from DataNode, measured in bytes per second. |
373+
| `ReadTransferRate`*num*`s(50/75/90/95/99)thPercentileRate` | The 50/75/90/95/99th percentile of the transfer rate of bytes read from DataNode, measured in bytes per second. |
371374
| `BlocksWritten` | Total number of blocks written to DataNode |
372375
| `BlocksRead` | Total number of blocks read from DataNode |
373376
| `BlocksReplicated` | Total number of blocks replicated |

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.apache.commons.lang3.StringUtils;
7070
import org.apache.hadoop.fs.ParentNotDirectoryException;
7171
import org.apache.hadoop.fs.UnresolvedLinkException;
72+
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7273
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
7374
import org.apache.hadoop.hdfs.server.namenode.INodesInPath;
7475
import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -1893,4 +1894,18 @@ public static boolean isParentEntry(final String path, final String parent) {
18931894
return path.charAt(parent.length()) == Path.SEPARATOR_CHAR
18941895
|| parent.equals(Path.SEPARATOR);
18951896
}
1897+
1898+
/**
1899+
* Add transfer rate metrics for valid data read and duration values.
1900+
* @param metrics metrics for datanodes
1901+
* @param read bytes read
1902+
* @param duration read duration
1903+
*/
1904+
public static void addTransferRateMetric(final DataNodeMetrics metrics, final long read, final long duration) {
1905+
if (read >= 0 && duration > 0) {
1906+
metrics.addReadTransferRate(read * 1000 / duration);
1907+
} else {
1908+
LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration);
1909+
}
1910+
}
18961911
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.commons.logging.Log;
2525
import org.apache.hadoop.fs.FsTracer;
2626
import org.apache.hadoop.fs.StorageType;
27+
import org.apache.hadoop.hdfs.DFSUtil;
2728
import org.apache.hadoop.hdfs.DFSUtilClient;
2829
import org.apache.hadoop.hdfs.ExtendedBlockId;
2930
import org.apache.hadoop.hdfs.net.Peer;
@@ -633,6 +634,7 @@ public void readBlock(final ExtendedBlock block,
633634
datanode.metrics.incrBytesRead((int) read);
634635
datanode.metrics.incrBlocksRead();
635636
datanode.metrics.incrTotalReadTime(duration);
637+
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
636638
} catch ( SocketException ignored ) {
637639
LOG.trace("{}:Ignoring exception while serving {} to {}",
638640
dnR, block, remoteAddress, ignored);
@@ -1122,6 +1124,7 @@ public void copyBlock(final ExtendedBlock block,
11221124
datanode.metrics.incrBytesRead((int) read);
11231125
datanode.metrics.incrBlocksRead();
11241126
datanode.metrics.incrTotalReadTime(duration);
1127+
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
11251128

11261129
LOG.info("Copied {} to {}", block, peer.getRemoteAddressString());
11271130
} catch (IOException ioe) {

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ public class DataNodeMetrics {
6161
@Metric MutableCounterLong bytesRead;
6262
@Metric("Milliseconds spent reading")
6363
MutableCounterLong totalReadTime;
64+
@Metric private MutableRate readTransferRate;
65+
final private MutableQuantiles[] readTransferRateQuantiles;
6466
@Metric MutableCounterLong blocksWritten;
6567
@Metric MutableCounterLong blocksRead;
6668
@Metric MutableCounterLong blocksReplicated;
@@ -201,6 +203,7 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals,
201203
sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len];
202204
ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len];
203205
ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len];
206+
readTransferRateQuantiles = new MutableQuantiles[len];
204207

205208
for (int i = 0; i < len; i++) {
206209
int interval = intervals[i];
@@ -229,6 +232,10 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals,
229232
"ramDiskBlocksLazyPersistWindows" + interval + "s",
230233
"Time between the RamDisk block write and disk persist in ms",
231234
"ops", "latency", interval);
235+
readTransferRateQuantiles[i] = registry.newQuantiles(
236+
"readTransferRate" + interval + "s",
237+
"Rate at which bytes are read from datanode calculated in bytes per second",
238+
"ops", "rate", interval);
232239
}
233240
}
234241

@@ -290,6 +297,13 @@ public void addIncrementalBlockReport(long latency,
290297
}
291298
}
292299

300+
public void addReadTransferRate(long readTransferRate) {
301+
this.readTransferRate.add(readTransferRate);
302+
for (MutableQuantiles q : readTransferRateQuantiles) {
303+
q.add(readTransferRate);
304+
}
305+
}
306+
293307
public void addCacheReport(long latency) {
294308
cacheReports.add(latency);
295309
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import static org.junit.Assert.assertThat;
4646
import static org.junit.Assert.assertTrue;
4747
import static org.junit.Assert.fail;
48+
import static org.mockito.Mockito.*;
4849

4950
import java.io.File;
5051
import java.io.IOException;
@@ -70,6 +71,7 @@
7071
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
7172
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
7273
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
74+
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7375
import org.apache.hadoop.hdfs.server.namenode.NameNode;
7476
import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
7577
import org.apache.hadoop.http.HttpConfig;
@@ -1109,4 +1111,18 @@ public void testErrorMessageForInvalidNameservice() throws Exception {
11091111
LambdaTestUtils.intercept(IOException.class, expectedErrorMessage,
11101112
()->DFSUtil.getNNServiceRpcAddressesForCluster(conf));
11111113
}
1114+
1115+
@Test
1116+
public void testAddTransferRateMetricForValidValues() {
1117+
DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class);
1118+
DFSUtil.addTransferRateMetric(mockMetrics, 100, 10);
1119+
verify(mockMetrics).addReadTransferRate(10000);
1120+
}
1121+
1122+
@Test
1123+
public void testAddTransferRateMetricForInvalidValue() {
1124+
DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class);
1125+
DFSUtil.addTransferRateMetric(mockMetrics, 100, 0);
1126+
verify(mockMetrics, times(0)).addReadTransferRate(anyLong());
1127+
}
11121128
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ public void testTimeoutMetric() throws Exception {
274274
@Test(timeout=120000)
275275
public void testDataNodeTimeSpend() throws Exception {
276276
Configuration conf = new HdfsConfiguration();
277+
conf.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + 60);
277278
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
278279
try {
279280
final FileSystem fs = cluster.getFileSystem();
@@ -285,6 +286,7 @@ public void testDataNodeTimeSpend() throws Exception {
285286

286287
final long startWriteValue = getLongCounter("TotalWriteTime", rb);
287288
final long startReadValue = getLongCounter("TotalReadTime", rb);
289+
assertCounter("ReadTransferRateNumOps", 0L, rb);
288290
final AtomicInteger x = new AtomicInteger(0);
289291

290292
// Lets Metric system update latest metrics
@@ -304,6 +306,8 @@ public Boolean get() {
304306
MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name());
305307
final long endWriteValue = getLongCounter("TotalWriteTime", rbNew);
306308
final long endReadValue = getLongCounter("TotalReadTime", rbNew);
309+
assertCounter("ReadTransferRateNumOps", 1L, rbNew);
310+
assertQuantileGauges("ReadTransferRate" + "60s", rbNew, "Rate");
307311
return endWriteValue > startWriteValue
308312
&& endReadValue > startReadValue;
309313
}

0 commit comments

Comments
 (0)