HDFS-15745. Make DataNodePeerMetrics#LOW_THRESHOLD_MS and MIN_OUTLIER_DETECTION_NODES configurable. Contributed by Haibin Huang.

This commit is contained in:
Ayush Saxena 2021-01-01 11:37:52 +05:30
parent 617af28e80
commit 1cd96e8dd8
3 changed files with 41 additions and 7 deletions

View File

@ -668,6 +668,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final long public static final long
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT = DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT =
1000; 1000;
public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY =
"dfs.datanode.min.outlier.detection.nodes";
public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT =
10L;
public static final String DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY =
"dfs.datanode.slowpeer.low.threshold.ms";
public static final long DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT =
5L;
public static final String DFS_DATANODE_HOST_NAME_KEY = public static final String DFS_DATANODE_HOST_NAME_KEY =
HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY; HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY;
public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY = public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY =

View File

@ -22,6 +22,7 @@
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.metrics2.MetricsJsonBuilder; import org.apache.hadoop.metrics2.MetricsJsonBuilder;
import org.apache.hadoop.metrics2.lib.MutableRollingAverages; import org.apache.hadoop.metrics2.lib.MutableRollingAverages;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -48,11 +49,6 @@ public class DataNodePeerMetrics {
private final String name; private final String name;
/**
* Threshold in milliseconds below which a DataNode is definitely not slow.
*/
private static final long LOW_THRESHOLD_MS = 5;
private static final long MIN_OUTLIER_DETECTION_NODES = 10;
private final OutlierDetector slowNodeDetector; private final OutlierDetector slowNodeDetector;
@ -62,14 +58,28 @@ public class DataNodePeerMetrics {
* outlier detection is skipped. * outlier detection is skipped.
*/ */
private final long minOutlierDetectionSamples; private final long minOutlierDetectionSamples;
/**
* Threshold in milliseconds below which a DataNode is definitely not slow.
*/
private final long lowThresholdMs;
/**
* Minimum number of nodes to run outlier detection.
*/
private final long minOutlierDetectionNodes;
public DataNodePeerMetrics(final String name, Configuration conf) { public DataNodePeerMetrics(final String name, Configuration conf) {
this.name = name; this.name = name;
minOutlierDetectionSamples = conf.getLong( minOutlierDetectionSamples = conf.getLong(
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY, DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY,
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT); DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT);
this.slowNodeDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_NODES, lowThresholdMs =
LOW_THRESHOLD_MS); conf.getLong(DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY,
DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT);
minOutlierDetectionNodes =
conf.getLong(DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY,
DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT);
this.slowNodeDetector =
new OutlierDetector(minOutlierDetectionNodes, lowThresholdMs);
sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time"); sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time");
} }

View File

@ -2330,6 +2330,22 @@
</description> </description>
</property> </property>
<property>
<name>dfs.datanode.min.outlier.detection.nodes</name>
<value>10</value>
<description>
Minimum number of nodes to run outlier detection.
</description>
</property>
<property>
<name>dfs.datanode.slowpeer.low.threshold.ms</name>
<value>5</value>
<description>
Threshold in milliseconds below which a DataNode is definitely not slow.
</description>
</property>
<property> <property>
<name>dfs.datanode.outliers.report.interval</name> <name>dfs.datanode.outliers.report.interval</name>
<value>30m</value> <value>30m</value>