HDFS-15854. Make some parameters configurable for SlowDiskTracker and SlowPeerTracker (#2718)

Authored-by: tomscut <litao@bigo.sg>
This commit is contained in:
litao 2021-03-01 23:52:59 +08:00 committed by GitHub
parent c3b3b36dee
commit 32353eb38a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 4 deletions

View File

@ -676,6 +676,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.datanode.slowpeer.low.threshold.ms"; "dfs.datanode.slowpeer.low.threshold.ms";
public static final long DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT = public static final long DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT =
5L; 5L;
public static final String DFS_DATANODE_MAX_NODES_TO_REPORT_KEY =
"dfs.datanode.max.nodes.to.report";
public static final int DFS_DATANODE_MAX_NODES_TO_REPORT_DEFAULT =
5;
public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY = public static final String DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY =
"dfs.datanode.min.outlier.detection.disks"; "dfs.datanode.min.outlier.detection.disks";
public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT = public static final long DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT =
@ -684,6 +688,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.datanode.slowdisk.low.threshold.ms"; "dfs.datanode.slowdisk.low.threshold.ms";
public static final long DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT = public static final long DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT =
20L; 20L;
public static final String DFS_DATANODE_MAX_DISKS_TO_REPORT_KEY =
"dfs.datanode.max.disks.to.report";
public static final int DFS_DATANODE_MAX_DISKS_TO_REPORT_DEFAULT =
5;
public static final String DFS_DATANODE_HOST_NAME_KEY = public static final String DFS_DATANODE_HOST_NAME_KEY =
HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY; HdfsClientConfigKeys.DeprecatedKeys.DFS_DATANODE_HOST_NAME_KEY;
public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY = public static final String DFS_NAMENODE_CHECKPOINT_DIR_KEY =

View File

@ -77,7 +77,7 @@ public class SlowDiskTracker {
* Number of disks to include in JSON report per operation. We will return * Number of disks to include in JSON report per operation. We will return
* disks with the highest latency. * disks with the highest latency.
*/ */
private static final int MAX_DISKS_TO_REPORT = 5; private final int maxDisksToReport;
private static final String DATANODE_DISK_SEPARATOR = ":"; private static final String DATANODE_DISK_SEPARATOR = ":";
private final long reportGenerationIntervalMs; private final long reportGenerationIntervalMs;
@ -107,6 +107,9 @@ public SlowDiskTracker(Configuration conf, Timer timer) {
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT, DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT,
TimeUnit.MILLISECONDS); TimeUnit.MILLISECONDS);
this.maxDisksToReport = conf.getInt(
DFSConfigKeys.DFS_DATANODE_MAX_DISKS_TO_REPORT_KEY,
DFSConfigKeys.DFS_DATANODE_MAX_DISKS_TO_REPORT_DEFAULT);
this.reportValidityMs = reportGenerationIntervalMs * 3; this.reportValidityMs = reportGenerationIntervalMs * 3;
} }
@ -153,7 +156,7 @@ public void updateSlowDiskReportAsync(long now) {
@Override @Override
public void run() { public void run() {
slowDisksReport = getSlowDisks(diskIDLatencyMap, slowDisksReport = getSlowDisks(diskIDLatencyMap,
MAX_DISKS_TO_REPORT, now); maxDisksToReport, now);
cleanUpOldReports(now); cleanUpOldReports(now);

View File

@ -79,7 +79,7 @@ public class SlowPeerTracker {
* Number of nodes to include in JSON report. We will return nodes with * Number of nodes to include in JSON report. We will return nodes with
* the highest number of votes from peers. * the highest number of votes from peers.
*/ */
private static final int MAX_NODES_TO_REPORT = 5; private final int maxNodesToReport;
/** /**
* Information about peers that have reported a node as being slow. * Information about peers that have reported a node as being slow.
@ -103,6 +103,9 @@ public SlowPeerTracker(Configuration conf, Timer timer) {
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT, DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_DEFAULT,
TimeUnit.MILLISECONDS) * 3; TimeUnit.MILLISECONDS) * 3;
this.maxNodesToReport = conf.getInt(
DFSConfigKeys.DFS_DATANODE_MAX_NODES_TO_REPORT_KEY,
DFSConfigKeys.DFS_DATANODE_MAX_NODES_TO_REPORT_DEFAULT);
} }
/** /**
@ -193,7 +196,7 @@ private SortedSet<String> filterNodeReports(
*/ */
public String getJson() { public String getJson() {
Collection<ReportForJson> validReports = getJsonReports( Collection<ReportForJson> validReports = getJsonReports(
MAX_NODES_TO_REPORT); maxNodesToReport);
try { try {
return WRITER.writeValueAsString(validReports); return WRITER.writeValueAsString(validReports);
} catch (JsonProcessingException e) { } catch (JsonProcessingException e) {

View File

@ -2346,6 +2346,15 @@
</description> </description>
</property> </property>
<property>
<name>dfs.datanode.max.nodes.to.report</name>
<value>5</value>
<description>
Number of nodes to include in JSON report. We will return nodes with
the highest number of votes from peers.
</description>
</property>
<property> <property>
<name>dfs.datanode.outliers.report.interval</name> <name>dfs.datanode.outliers.report.interval</name>
<value>30m</value> <value>30m</value>
@ -2386,6 +2395,15 @@
</description> </description>
</property> </property>
<property>
<name>dfs.datanode.max.disks.to.report</name>
<value>5</value>
<description>
Number of disks to include in JSON report per operation. We will return
disks with the highest latency.
</description>
</property>
<property> <property>
<name>hadoop.user.group.metrics.percentiles.intervals</name> <name>hadoop.user.group.metrics.percentiles.intervals</name>
<value></value> <value></value>