HDFS-15745. Make DataNodePeerMetrics#LOW_THRESHOLD_MS and MIN_OUTLIER_DETECTION_NODES configurable. Contributed by Haibin Huang. (#3992)
(cherry picked from commit 1cd96e8dd8
)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
Reviewed-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
parent
352656999f
commit
4c57fb4d6b
@ -650,6 +650,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||
public static final long
|
||||
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT =
|
||||
1000;
|
||||
public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY =
|
||||
"dfs.datanode.min.outlier.detection.nodes";
|
||||
public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT =
|
||||
10L;
|
||||
public static final String DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY =
|
||||
"dfs.datanode.slowpeer.low.threshold.ms";
|
||||
public static final long DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT =
|
||||
5L;
|
||||
public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY =
|
||||
"dfs.datanode.min.outlier.detection.disks";
|
||||
public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT =
|
||||
|
@ -22,6 +22,7 @@
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.metrics2.MetricsJsonBuilder;
|
||||
import org.apache.hadoop.metrics2.lib.MutableRollingAverages;
|
||||
import org.slf4j.Logger;
|
||||
@ -48,11 +49,6 @@ public class DataNodePeerMetrics {
|
||||
|
||||
private final String name;
|
||||
|
||||
/**
|
||||
* Threshold in milliseconds below which a DataNode is definitely not slow.
|
||||
*/
|
||||
private static final long LOW_THRESHOLD_MS = 5;
|
||||
private static final long MIN_OUTLIER_DETECTION_NODES = 10;
|
||||
|
||||
private final OutlierDetector slowNodeDetector;
|
||||
|
||||
@ -62,14 +58,28 @@ public class DataNodePeerMetrics {
|
||||
* outlier detection is skipped.
|
||||
*/
|
||||
private final long minOutlierDetectionSamples;
|
||||
/**
|
||||
* Threshold in milliseconds below which a DataNode is definitely not slow.
|
||||
*/
|
||||
private final long lowThresholdMs;
|
||||
/**
|
||||
* Minimum number of nodes to run outlier detection.
|
||||
*/
|
||||
private final long minOutlierDetectionNodes;
|
||||
|
||||
public DataNodePeerMetrics(final String name, Configuration conf) {
|
||||
this.name = name;
|
||||
minOutlierDetectionSamples = conf.getLong(
|
||||
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY,
|
||||
DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT);
|
||||
this.slowNodeDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_NODES,
|
||||
LOW_THRESHOLD_MS);
|
||||
lowThresholdMs =
|
||||
conf.getLong(DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT);
|
||||
minOutlierDetectionNodes =
|
||||
conf.getLong(DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY,
|
||||
DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT);
|
||||
this.slowNodeDetector =
|
||||
new OutlierDetector(minOutlierDetectionNodes, lowThresholdMs);
|
||||
sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time");
|
||||
}
|
||||
|
||||
|
@ -2330,6 +2330,22 @@
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.min.outlier.detection.nodes</name>
|
||||
<value>10</value>
|
||||
<description>
|
||||
Minimum number of nodes to run outlier detection.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.slowpeer.low.threshold.ms</name>
|
||||
<value>5</value>
|
||||
<description>
|
||||
Threshold in milliseconds below which a DataNode is definitely not slow.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.outliers.report.interval</name>
|
||||
<value>30m</value>
|
||||
|
Loading…
Reference in New Issue
Block a user