Skip to content

HDFS-16949 Update ReadTransferRate to ReadLatencyPerGB for effectiv… #5479

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,9 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
|:---- |:---- |
| `BytesWritten` | Total number of bytes written to DataNode |
| `BytesRead` | Total number of bytes read from DataNode |
| `ReadTransferRateNumOps` | Total number of data read transfers |
| `ReadTransferRateAvgTime` | Average transfer rate of bytes read from DataNode, measured in bytes per second. |
| `ReadTransferRate`*num*`s(50/75/90/95/99)thPercentileRate` | The 50/75/90/95/99th percentile of the transfer rate of bytes read from DataNode, measured in bytes per second. |
| `ReadLatencyPerGBNumOps` | Total number of data reads |
| `ReadLatencyPerGBAvgTime` | Average time taken to read 1 GB data from DataNode, measured in seconds per GB. |
| `ReadLatencyPerGB`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of the time taken to read 1 GB data from DataNode, measured in seconds per GB. |
| `BlocksWritten` | Total number of blocks written to DataNode |
| `BlocksRead` | Total number of blocks read from DataNode |
| `BlocksReplicated` | Total number of blocks replicated |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1939,16 +1939,17 @@ public static boolean isParentEntry(final String path, final String parent) {
}

/**
* Add transfer rate metrics for valid data read and duration values.
* Add time taken in seconds to read a GB of data metric for valid data read and duration values.
* @param metrics metrics for datanodes
* @param read bytes read
* @param duration read duration
* @param duration read duration in milliseconds
*/
public static void addTransferRateMetric(final DataNodeMetrics metrics, final long read, final long duration) {
if (read >= 0 && duration > 0) {
metrics.addReadTransferRate(read * 1000 / duration);
public static void addReadLatencyPerGBMetric(final DataNodeMetrics metrics,
final long read, final long duration) {
if (read > 0 && duration >= 0) {
metrics.addReadLatencyPerGB(duration * 1000_000 / read);
} else {
LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration);
LOG.warn("Unexpected value for data read bytes={} duration={}", read, duration);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ public void readBlock(final ExtendedBlock block,
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
datanode.metrics.incrTotalReadTime(duration);
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
DFSUtil.addReadLatencyPerGBMetric(datanode.metrics, read, duration);
} catch ( SocketException ignored ) {
LOG.trace("{}:Ignoring exception while serving {} to {}",
dnR, block, remoteAddress, ignored);
Expand Down Expand Up @@ -1124,7 +1124,7 @@ public void copyBlock(final ExtendedBlock block,
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
datanode.metrics.incrTotalReadTime(duration);
DFSUtil.addTransferRateMetric(datanode.metrics, read, duration);
DFSUtil.addReadLatencyPerGBMetric(datanode.metrics, read, duration);

LOG.info("Copied {} to {}", block, peer.getRemoteAddressString());
} catch (IOException ioe) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ public class DataNodeMetrics {
@Metric MutableCounterLong bytesRead;
@Metric("Milliseconds spent reading")
MutableCounterLong totalReadTime;
@Metric private MutableRate readTransferRate;
final private MutableQuantiles[] readTransferRateQuantiles;
@Metric private MutableRate readLatencyPerGB;
final private MutableQuantiles[] readLatencyPerGBQuantiles;
@Metric MutableCounterLong blocksWritten;
@Metric MutableCounterLong blocksRead;
@Metric MutableCounterLong blocksReplicated;
Expand Down Expand Up @@ -229,7 +229,7 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals,
sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len];
ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len];
ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len];
readTransferRateQuantiles = new MutableQuantiles[len];
readLatencyPerGBQuantiles = new MutableQuantiles[len];

for (int i = 0; i < len; i++) {
int interval = intervals[i];
Expand Down Expand Up @@ -258,10 +258,10 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals,
"ramDiskBlocksLazyPersistWindows" + interval + "s",
"Time between the RamDisk block write and disk persist in ms",
"ops", "latency", interval);
readTransferRateQuantiles[i] = registry.newQuantiles(
"readTransferRate" + interval + "s",
"Rate at which bytes are read from datanode calculated in bytes per second",
"ops", "rate", interval);
readLatencyPerGBQuantiles[i] = registry.newQuantiles(
"readLatencyPerGB" + interval + "s",
"Time taken in seconds for reading data from datanode per GB",
"ops", "latency", interval);
}
}

Expand Down Expand Up @@ -323,10 +323,10 @@ public void addIncrementalBlockReport(long latency,
}
}

public void addReadTransferRate(long readTransferRate) {
this.readTransferRate.add(readTransferRate);
for (MutableQuantiles q : readTransferRateQuantiles) {
q.add(readTransferRate);
public void addReadLatencyPerGB(long readLatencyPerGB) {
this.readLatencyPerGB.add(readLatencyPerGB);
for (MutableQuantiles q : readLatencyPerGBQuantiles) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we modify these quantiles rather than the metric?

I think we have the metric we want, but we want the lowest vs highest percentile (ts clear the current percentile is lower=better).

q.add(readLatencyPerGB);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1112,16 +1112,16 @@ public void testErrorMessageForInvalidNameservice() throws Exception {
}

@Test
public void testAddTransferRateMetricForValidValues() {
public void testAddReadLatencyPerGBMetricForValidValues() {
DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class);
DFSUtil.addTransferRateMetric(mockMetrics, 100, 10);
verify(mockMetrics).addReadTransferRate(10000);
DFSUtil.addReadLatencyPerGBMetric(mockMetrics, 3333333333L, 99999L);
verify(mockMetrics).addReadLatencyPerGB(29L);
}

@Test
public void testAddTransferRateMetricForInvalidValue() {
public void testAddReadLatencyPerGBMetricForInvalidValue() {
DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class);
DFSUtil.addTransferRateMetric(mockMetrics, 100, 0);
verify(mockMetrics, times(0)).addReadTransferRate(anyLong());
DFSUtil.addReadLatencyPerGBMetric(mockMetrics, 0, 1000);
verify(mockMetrics, times(0)).addReadLatencyPerGB(anyLong());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ public void testDataNodeTimeSpend() throws Exception {

final long startWriteValue = getLongCounter("TotalWriteTime", rb);
final long startReadValue = getLongCounter("TotalReadTime", rb);
assertCounter("ReadTransferRateNumOps", 0L, rb);
assertCounter("ReadLatencyPerGBNumOps", 0L, rb);
final AtomicInteger x = new AtomicInteger(0);

// Lets Metric system update latest metrics
Expand All @@ -412,8 +412,8 @@ public Boolean get() {
MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name());
final long endWriteValue = getLongCounter("TotalWriteTime", rbNew);
final long endReadValue = getLongCounter("TotalReadTime", rbNew);
assertCounter("ReadTransferRateNumOps", 1L, rbNew);
assertQuantileGauges("ReadTransferRate" + "60s", rbNew, "Rate");
assertCounter("ReadLatencyPerGBNumOps", 1L, rbNew);
assertQuantileGauges("ReadLatencyPerGB" + "60s", rbNew);
return endWriteValue > startWriteValue
&& endReadValue > startReadValue;
}
Expand Down