From 915271245b9a3f682409db380eb311ffd465b041 Mon Sep 17 00:00:00 2001 From: leosunli Date: Fri, 30 Aug 2019 09:35:26 +0800 Subject: [PATCH] Make DataNodePeerMetrics#minOutlierDetectionSamples configurable (#1314). Contributed by Lisheng Sun. Signed-off-by: sunlisheng --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 6 +++++ .../hadoop/hdfs/server/datanode/DataNode.java | 2 +- .../datanode/metrics/DataNodePeerMetrics.java | 23 +++++++++++++------ .../src/main/resources/hdfs-default.xml | 9 ++++++++ .../datanode/TestDataNodePeerMetrics.java | 2 +- ...estDataNodeOutlierDetectionViaMetrics.java | 13 +++++++---- 6 files changed, 42 insertions(+), 13 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index e6e0bae39a..b4011bb55b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -579,6 +579,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATANODE_PEER_STATS_ENABLED_KEY = "dfs.datanode.peer.stats.enabled"; public static final boolean DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT = false; + public static final String + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY = + "dfs.datanode.peer.metrics.min.outlier.detection.samples"; + public static final long + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT = + 1000; public static final String DFS_DATANODE_HOST_NAME_KEY = HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY; public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index e620a2b688..583ac76fb5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1448,7 +1448,7 @@ void startDataNode(List dataDirectories, metrics = DataNodeMetrics.create(getConf(), getDisplayName()); peerMetrics = dnConf.peerStatsEnabled ? - DataNodePeerMetrics.create(getDisplayName()) : null; + DataNodePeerMetrics.create(getDisplayName(), getConf()) : null; metrics.getJvmMetrics().setPauseMonitor(pauseMonitor); ecWorker = new ErasureCodingWorker(getConf(), this); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java index 54b9559963..3c70a23ac5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java @@ -19,9 +19,9 @@ package org.apache.hadoop.hdfs.server.datanode.metrics; -import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.metrics2.MetricsJsonBuilder; import org.apache.hadoop.metrics2.lib.MutableRollingAverages; import org.slf4j.Logger; @@ -30,6 +30,9 @@ import java.util.Map; import java.util.concurrent.ThreadLocalRandom; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; + /** * This class maintains DataNode peer metrics (e.g. numOps, AvgTime, etc.) for * various peer operations. @@ -58,11 +61,13 @@ public class DataNodePeerMetrics { * for outlier detection. If the number of samples is below this then * outlier detection is skipped. */ - @VisibleForTesting - static final long MIN_OUTLIER_DETECTION_SAMPLES = 1000; + private final long minOutlierDetectionSamples; - public DataNodePeerMetrics(final String name) { + public DataNodePeerMetrics(final String name, Configuration conf) { this.name = name; + minOutlierDetectionSamples = conf.getLong( + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY, + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT); this.slowNodeDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_NODES, LOW_THRESHOLD_MS); sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time"); @@ -72,15 +77,19 @@ public String name() { return name; } + long getMinOutlierDetectionSamples() { + return minOutlierDetectionSamples; + } + /** * Creates an instance of DataNodePeerMetrics, used for registration. */ - public static DataNodePeerMetrics create(String dnName) { + public static DataNodePeerMetrics create(String dnName, Configuration conf) { final String name = "DataNodePeerActivity-" + (dnName.isEmpty() ? "UndefinedDataNodeName" + ThreadLocalRandom.current().nextInt() : dnName.replace(':', '-')); - return new DataNodePeerMetrics(name); + return new DataNodePeerMetrics(name, conf); } /** @@ -122,7 +131,7 @@ public Map getOutliers() { // The metric name is the datanode ID. final Map stats = sendPacketDownstreamRollingAverages.getStats( - MIN_OUTLIER_DETECTION_SAMPLES); + minOutlierDetectionSamples); LOG.trace("DataNodePeerMetrics: Got stats: {}", stats); return slowNodeDetector.getOutliers(stats); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 40583d0965..e4e7a15960 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2182,6 +2182,15 @@ + + dfs.datanode.peer.metrics.min.outlier.detection.samples + 1000 + + Minimum number of packet send samples which are required to qualify for outlier detection. + If the number of samples is below this then outlier detection is skipped. + + + dfs.datanode.outliers.report.interval 30m diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodePeerMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodePeerMetrics.java index 44601a79af..3caf24d83f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodePeerMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodePeerMetrics.java @@ -46,7 +46,7 @@ public void testGetSendPacketDownstreamAvgInfo() throws Exception { conf.setBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, true); final DataNodePeerMetrics peerMetrics = DataNodePeerMetrics.create( - "Sample-DataNode"); + "Sample-DataNode", conf); MetricsTestHelper.replaceRollingAveragesScheduler( peerMetrics.getSendPacketDownstreamRollingAverages(), numWindows, windowSize, TimeUnit.SECONDS); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/metrics/TestDataNodeOutlierDetectionViaMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/metrics/TestDataNodeOutlierDetectionViaMetrics.java index 21586af0c7..c9e3a1dfed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/metrics/TestDataNodeOutlierDetectionViaMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/metrics/TestDataNodeOutlierDetectionViaMetrics.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.datanode.metrics; import com.google.common.base.Supplier; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.metrics2.lib.MetricsTestHelper; import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; @@ -61,10 +63,13 @@ public class TestDataNodeOutlierDetectionViaMetrics { private Random random = new Random(System.currentTimeMillis()); + private Configuration conf; + @Before public void setup() { GenericTestUtils.setLogLevel(DataNodePeerMetrics.LOG, Level.ALL); GenericTestUtils.setLogLevel(OutlierDetector.LOG, Level.ALL); + conf = new HdfsConfiguration(); } /** @@ -75,7 +80,7 @@ public void testOutlierIsDetected() throws Exception { final String slowNodeName = "SlowNode"; DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics( - "PeerMetrics-For-Test"); + "PeerMetrics-For-Test", conf); MetricsTestHelper.replaceRollingAveragesScheduler( peerMetrics.getSendPacketDownstreamRollingAverages(), @@ -107,7 +112,7 @@ public Boolean get() { @Test public void testWithNoOutliers() throws Exception { DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics( - "PeerMetrics-For-Test"); + "PeerMetrics-For-Test", conf); MetricsTestHelper.replaceRollingAveragesScheduler( peerMetrics.getSendPacketDownstreamRollingAverages(), @@ -134,7 +139,7 @@ public void injectFastNodesSamples(DataNodePeerMetrics peerMetrics) { final String nodeName = "FastNode-" + nodeIndex; LOG.info("Generating stats for node {}", nodeName); for (int i = 0; - i < 2 * DataNodePeerMetrics.MIN_OUTLIER_DETECTION_SAMPLES; + i < 2 * peerMetrics.getMinOutlierDetectionSamples(); ++i) { peerMetrics.addSendPacketDownstream( nodeName, random.nextInt(FAST_NODE_MAX_LATENCY_MS)); @@ -151,7 +156,7 @@ public void injectSlowNodeSamples( // And the one slow node. for (int i = 0; - i < 2 * DataNodePeerMetrics.MIN_OUTLIER_DETECTION_SAMPLES; + i < 2 * peerMetrics.getMinOutlierDetectionSamples(); ++i) { peerMetrics.addSendPacketDownstream( slowNodeName, SLOW_NODE_LATENCY_MS);