Make DataNodePeerMetrics#minOutlierDetectionSamples configurable (#1314). Contributed by Lisheng Sun.
Signed-off-by: sunlisheng <sunlisheng@xiaomi.com>
This commit is contained in:
parent
dc0acceabb
commit
915271245b
@ -579,6 +579,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||||||
public static final String DFS_DATANODE_PEER_STATS_ENABLED_KEY =
|
public static final String DFS_DATANODE_PEER_STATS_ENABLED_KEY =
|
||||||
"dfs.datanode.peer.stats.enabled";
|
"dfs.datanode.peer.stats.enabled";
|
||||||
public static final boolean DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT = false;
|
public static final boolean DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT = false;
|
||||||
|
public static final String
|
||||||
|
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY =
|
||||||
|
"dfs.datanode.peer.metrics.min.outlier.detection.samples";
|
||||||
|
public static final long
|
||||||
|
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT =
|
||||||
|
1000;
|
||||||
public static final String DFS_DATANODE_HOST_NAME_KEY =
|
public static final String DFS_DATANODE_HOST_NAME_KEY =
|
||||||
HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY;
|
HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY;
|
||||||
public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY =
|
public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY =
|
||||||
|
@ -1448,7 +1448,7 @@ void startDataNode(List<StorageLocation> dataDirectories,
|
|||||||
|
|
||||||
metrics = DataNodeMetrics.create(getConf(), getDisplayName());
|
metrics = DataNodeMetrics.create(getConf(), getDisplayName());
|
||||||
peerMetrics = dnConf.peerStatsEnabled ?
|
peerMetrics = dnConf.peerStatsEnabled ?
|
||||||
DataNodePeerMetrics.create(getDisplayName()) : null;
|
DataNodePeerMetrics.create(getDisplayName(), getConf()) : null;
|
||||||
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
|
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
|
||||||
|
|
||||||
ecWorker = new ErasureCodingWorker(getConf(), this);
|
ecWorker = new ErasureCodingWorker(getConf(), this);
|
||||||
|
@ -19,9 +19,9 @@
|
|||||||
package org.apache.hadoop.hdfs.server.datanode.metrics;
|
package org.apache.hadoop.hdfs.server.datanode.metrics;
|
||||||
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.metrics2.MetricsJsonBuilder;
|
import org.apache.hadoop.metrics2.MetricsJsonBuilder;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableRollingAverages;
|
import org.apache.hadoop.metrics2.lib.MutableRollingAverages;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -30,6 +30,9 @@
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ThreadLocalRandom;
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class maintains DataNode peer metrics (e.g. numOps, AvgTime, etc.) for
|
* This class maintains DataNode peer metrics (e.g. numOps, AvgTime, etc.) for
|
||||||
* various peer operations.
|
* various peer operations.
|
||||||
@ -58,11 +61,13 @@ public class DataNodePeerMetrics {
|
|||||||
* for outlier detection. If the number of samples is below this then
|
* for outlier detection. If the number of samples is below this then
|
||||||
* outlier detection is skipped.
|
* outlier detection is skipped.
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
private final long minOutlierDetectionSamples;
|
||||||
static final long MIN_OUTLIER_DETECTION_SAMPLES = 1000;
|
|
||||||
|
|
||||||
public DataNodePeerMetrics(final String name) {
|
public DataNodePeerMetrics(final String name, Configuration conf) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
minOutlierDetectionSamples = conf.getLong(
|
||||||
|
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY,
|
||||||
|
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT);
|
||||||
this.slowNodeDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_NODES,
|
this.slowNodeDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_NODES,
|
||||||
LOW_THRESHOLD_MS);
|
LOW_THRESHOLD_MS);
|
||||||
sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time");
|
sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time");
|
||||||
@ -72,15 +77,19 @@ public String name() {
|
|||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long getMinOutlierDetectionSamples() {
|
||||||
|
return minOutlierDetectionSamples;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an instance of DataNodePeerMetrics, used for registration.
|
* Creates an instance of DataNodePeerMetrics, used for registration.
|
||||||
*/
|
*/
|
||||||
public static DataNodePeerMetrics create(String dnName) {
|
public static DataNodePeerMetrics create(String dnName, Configuration conf) {
|
||||||
final String name = "DataNodePeerActivity-" + (dnName.isEmpty()
|
final String name = "DataNodePeerActivity-" + (dnName.isEmpty()
|
||||||
? "UndefinedDataNodeName" + ThreadLocalRandom.current().nextInt()
|
? "UndefinedDataNodeName" + ThreadLocalRandom.current().nextInt()
|
||||||
: dnName.replace(':', '-'));
|
: dnName.replace(':', '-'));
|
||||||
|
|
||||||
return new DataNodePeerMetrics(name);
|
return new DataNodePeerMetrics(name, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -122,7 +131,7 @@ public Map<String, Double> getOutliers() {
|
|||||||
// The metric name is the datanode ID.
|
// The metric name is the datanode ID.
|
||||||
final Map<String, Double> stats =
|
final Map<String, Double> stats =
|
||||||
sendPacketDownstreamRollingAverages.getStats(
|
sendPacketDownstreamRollingAverages.getStats(
|
||||||
MIN_OUTLIER_DETECTION_SAMPLES);
|
minOutlierDetectionSamples);
|
||||||
LOG.trace("DataNodePeerMetrics: Got stats: {}", stats);
|
LOG.trace("DataNodePeerMetrics: Got stats: {}", stats);
|
||||||
|
|
||||||
return slowNodeDetector.getOutliers(stats);
|
return slowNodeDetector.getOutliers(stats);
|
||||||
|
@ -2182,6 +2182,15 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.datanode.peer.metrics.min.outlier.detection.samples</name>
|
||||||
|
<value>1000</value>
|
||||||
|
<description>
|
||||||
|
Minimum number of packet send samples which are required to qualify for outlier detection.
|
||||||
|
If the number of samples is below this then outlier detection is skipped.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.datanode.outliers.report.interval</name>
|
<name>dfs.datanode.outliers.report.interval</name>
|
||||||
<value>30m</value>
|
<value>30m</value>
|
||||||
|
@ -46,7 +46,7 @@ public void testGetSendPacketDownstreamAvgInfo() throws Exception {
|
|||||||
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, true);
|
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY, true);
|
||||||
|
|
||||||
final DataNodePeerMetrics peerMetrics = DataNodePeerMetrics.create(
|
final DataNodePeerMetrics peerMetrics = DataNodePeerMetrics.create(
|
||||||
"Sample-DataNode");
|
"Sample-DataNode", conf);
|
||||||
MetricsTestHelper.replaceRollingAveragesScheduler(
|
MetricsTestHelper.replaceRollingAveragesScheduler(
|
||||||
peerMetrics.getSendPacketDownstreamRollingAverages(),
|
peerMetrics.getSendPacketDownstreamRollingAverages(),
|
||||||
numWindows, windowSize, TimeUnit.SECONDS);
|
numWindows, windowSize, TimeUnit.SECONDS);
|
||||||
|
@ -19,6 +19,8 @@
|
|||||||
package org.apache.hadoop.hdfs.server.datanode.metrics;
|
package org.apache.hadoop.hdfs.server.datanode.metrics;
|
||||||
|
|
||||||
import com.google.common.base.Supplier;
|
import com.google.common.base.Supplier;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.metrics2.lib.MetricsTestHelper;
|
import org.apache.hadoop.metrics2.lib.MetricsTestHelper;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
@ -61,10 +63,13 @@ public class TestDataNodeOutlierDetectionViaMetrics {
|
|||||||
|
|
||||||
private Random random = new Random(System.currentTimeMillis());
|
private Random random = new Random(System.currentTimeMillis());
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
GenericTestUtils.setLogLevel(DataNodePeerMetrics.LOG, Level.ALL);
|
GenericTestUtils.setLogLevel(DataNodePeerMetrics.LOG, Level.ALL);
|
||||||
GenericTestUtils.setLogLevel(OutlierDetector.LOG, Level.ALL);
|
GenericTestUtils.setLogLevel(OutlierDetector.LOG, Level.ALL);
|
||||||
|
conf = new HdfsConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -75,7 +80,7 @@ public void testOutlierIsDetected() throws Exception {
|
|||||||
final String slowNodeName = "SlowNode";
|
final String slowNodeName = "SlowNode";
|
||||||
|
|
||||||
DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics(
|
DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics(
|
||||||
"PeerMetrics-For-Test");
|
"PeerMetrics-For-Test", conf);
|
||||||
|
|
||||||
MetricsTestHelper.replaceRollingAveragesScheduler(
|
MetricsTestHelper.replaceRollingAveragesScheduler(
|
||||||
peerMetrics.getSendPacketDownstreamRollingAverages(),
|
peerMetrics.getSendPacketDownstreamRollingAverages(),
|
||||||
@ -107,7 +112,7 @@ public Boolean get() {
|
|||||||
@Test
|
@Test
|
||||||
public void testWithNoOutliers() throws Exception {
|
public void testWithNoOutliers() throws Exception {
|
||||||
DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics(
|
DataNodePeerMetrics peerMetrics = new DataNodePeerMetrics(
|
||||||
"PeerMetrics-For-Test");
|
"PeerMetrics-For-Test", conf);
|
||||||
|
|
||||||
MetricsTestHelper.replaceRollingAveragesScheduler(
|
MetricsTestHelper.replaceRollingAveragesScheduler(
|
||||||
peerMetrics.getSendPacketDownstreamRollingAverages(),
|
peerMetrics.getSendPacketDownstreamRollingAverages(),
|
||||||
@ -134,7 +139,7 @@ public void injectFastNodesSamples(DataNodePeerMetrics peerMetrics) {
|
|||||||
final String nodeName = "FastNode-" + nodeIndex;
|
final String nodeName = "FastNode-" + nodeIndex;
|
||||||
LOG.info("Generating stats for node {}", nodeName);
|
LOG.info("Generating stats for node {}", nodeName);
|
||||||
for (int i = 0;
|
for (int i = 0;
|
||||||
i < 2 * DataNodePeerMetrics.MIN_OUTLIER_DETECTION_SAMPLES;
|
i < 2 * peerMetrics.getMinOutlierDetectionSamples();
|
||||||
++i) {
|
++i) {
|
||||||
peerMetrics.addSendPacketDownstream(
|
peerMetrics.addSendPacketDownstream(
|
||||||
nodeName, random.nextInt(FAST_NODE_MAX_LATENCY_MS));
|
nodeName, random.nextInt(FAST_NODE_MAX_LATENCY_MS));
|
||||||
@ -151,7 +156,7 @@ public void injectSlowNodeSamples(
|
|||||||
|
|
||||||
// And the one slow node.
|
// And the one slow node.
|
||||||
for (int i = 0;
|
for (int i = 0;
|
||||||
i < 2 * DataNodePeerMetrics.MIN_OUTLIER_DETECTION_SAMPLES;
|
i < 2 * peerMetrics.getMinOutlierDetectionSamples();
|
||||||
++i) {
|
++i) {
|
||||||
peerMetrics.addSendPacketDownstream(
|
peerMetrics.addSendPacketDownstream(
|
||||||
slowNodeName, SLOW_NODE_LATENCY_MS);
|
slowNodeName, SLOW_NODE_LATENCY_MS);
|
||||||
|
Loading…
Reference in New Issue
Block a user