diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index e7d387b113..a551e3ae15 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -370,6 +370,9 @@ Each metrics record contains tags such as SessionId and Hostname as additional i |:---- |:---- | | `BytesWritten` | Total number of bytes written to DataNode | | `BytesRead` | Total number of bytes read from DataNode | +| `ReadTransferRateNumOps` | Total number of data read transfers | +| `ReadTransferRateAvgTime` | Average transfer rate of bytes read from DataNode, measured in bytes per second. | +| `ReadTransferRate`*num*`s(50/75/90/95/99)thPercentileRate` | The 50/75/90/95/99th percentile of the transfer rate of bytes read from DataNode, measured in bytes per second. | | `BlocksWritten` | Total number of blocks written to DataNode | | `BlocksRead` | Total number of blocks read from DataNode | | `BlocksReplicated` | Total number of blocks replicated | diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index 7237489e7b..25726cee51 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -69,6 +69,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.UnresolvedLinkException; +import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.FSDirectory; import org.apache.hadoop.hdfs.server.namenode.INodesInPath; import org.apache.hadoop.ipc.ProtobufRpcEngine; @@ -1936,4 +1937,18 @@ public static boolean isParentEntry(final String path, final String parent) { return path.charAt(parent.length()) == Path.SEPARATOR_CHAR || parent.equals(Path.SEPARATOR); } + + /** + * Add transfer rate metrics for valid data read and duration values. + * @param metrics metrics for datanodes + * @param read bytes read + * @param duration read duration + */ + public static void addTransferRateMetric(final DataNodeMetrics metrics, final long read, final long duration) { + if (read >= 0 && duration > 0) { + metrics.addReadTransferRate(read * 1000 / duration); + } else { + LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 3cf4bde3d4..ab706fb173 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.datanode; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.protobuf.ByteString; import javax.crypto.SecretKey; @@ -632,6 +633,7 @@ public void readBlock(final ExtendedBlock block, datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); datanode.metrics.incrTotalReadTime(duration); + DFSUtil.addTransferRateMetric(datanode.metrics, read, duration); } catch ( SocketException ignored ) { LOG.trace("{}:Ignoring exception while serving {} to {}", dnR, block, remoteAddress, ignored); @@ -1122,6 +1124,7 @@ public void copyBlock(final ExtendedBlock block, datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); datanode.metrics.incrTotalReadTime(duration); + DFSUtil.addTransferRateMetric(datanode.metrics, read, duration); LOG.info("Copied {} to {}", block, peer.getRemoteAddressString()); } catch (IOException ioe) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index 649d30e91e..675dbbff4c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -61,6 +61,8 @@ public class DataNodeMetrics { @Metric MutableCounterLong bytesRead; @Metric("Milliseconds spent reading") MutableCounterLong totalReadTime; + @Metric private MutableRate readTransferRate; + final private MutableQuantiles[] readTransferRateQuantiles; @Metric MutableCounterLong blocksWritten; @Metric MutableCounterLong blocksRead; @Metric MutableCounterLong blocksReplicated; @@ -227,6 +229,7 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals, sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len]; ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len]; ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len]; + readTransferRateQuantiles = new MutableQuantiles[len]; for (int i = 0; i < len; i++) { int interval = intervals[i]; @@ -255,6 +258,10 @@ public DataNodeMetrics(String name, String sessionId, int[] intervals, "ramDiskBlocksLazyPersistWindows" + interval + "s", "Time between the RamDisk block write and disk persist in ms", "ops", "latency", interval); + readTransferRateQuantiles[i] = registry.newQuantiles( + "readTransferRate" + interval + "s", + "Rate at which bytes are read from datanode calculated in bytes per second", + "ops", "rate", interval); } } @@ -316,6 +323,13 @@ public void addIncrementalBlockReport(long latency, } } + public void addReadTransferRate(long readTransferRate) { + this.readTransferRate.add(readTransferRate); + for (MutableQuantiles q : readTransferRateQuantiles) { + q.add(readTransferRate); + } + } + public void addCacheReport(long latency) { cacheReports.add(latency); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java index e6ce29316c..f8e8e4120c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java @@ -45,6 +45,7 @@ import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Mockito.*; import java.io.File; import java.io.IOException; @@ -71,6 +72,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider; import org.apache.hadoop.http.HttpConfig; @@ -1108,4 +1110,18 @@ public void testErrorMessageForInvalidNameservice() throws Exception { LambdaTestUtils.intercept(IOException.class, expectedErrorMessage, ()->DFSUtil.getNNServiceRpcAddressesForCluster(conf)); } + + @Test + public void testAddTransferRateMetricForValidValues() { + DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class); + DFSUtil.addTransferRateMetric(mockMetrics, 100, 10); + verify(mockMetrics).addReadTransferRate(10000); + } + + @Test + public void testAddTransferRateMetricForInvalidValue() { + DataNodeMetrics mockMetrics = mock(DataNodeMetrics.class); + DFSUtil.addTransferRateMetric(mockMetrics, 100, 0); + verify(mockMetrics, times(0)).addReadTransferRate(anyLong()); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java index 2bf7861287..de5c985a4f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java @@ -380,6 +380,7 @@ public void testTimeoutMetric() throws Exception { @Test(timeout=120000) public void testDataNodeTimeSpend() throws Exception { Configuration conf = new HdfsConfiguration(); + conf.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, "" + 60); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { final FileSystem fs = cluster.getFileSystem(); @@ -391,6 +392,7 @@ public void testDataNodeTimeSpend() throws Exception { final long startWriteValue = getLongCounter("TotalWriteTime", rb); final long startReadValue = getLongCounter("TotalReadTime", rb); + assertCounter("ReadTransferRateNumOps", 0L, rb); final AtomicInteger x = new AtomicInteger(0); // Lets Metric system update latest metrics @@ -410,6 +412,8 @@ public Boolean get() { MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name()); final long endWriteValue = getLongCounter("TotalWriteTime", rbNew); final long endReadValue = getLongCounter("TotalReadTime", rbNew); + assertCounter("ReadTransferRateNumOps", 1L, rbNew); + assertQuantileGauges("ReadTransferRate" + "60s", rbNew, "Rate"); return endWriteValue > startWriteValue && endReadValue > startReadValue; }