HDFS-16858. Dynamically adjust max slow disks to exclude. (#5180)

Reviewed-by: Chris Nauroth <cnauroth@apache.org>
Reviewed-by: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Signed-off-by: Tao Li <tomscut@apache.org>
This commit is contained in:
dingshun3016 2022-12-09 08:10:04 +08:00 committed by GitHub
parent ee7d1787cd
commit 2fa540dca1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 28 additions and 3 deletions

View File

@ -72,6 +72,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT;
@ -353,6 +355,7 @@ public class DataNode extends ReconfigurableBase
DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY, DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY,
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY, DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY,
DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY,
FS_DU_INTERVAL_KEY, FS_DU_INTERVAL_KEY,
FS_GETSPACEUSED_JITTER_KEY, FS_GETSPACEUSED_JITTER_KEY,
FS_GETSPACEUSED_CLASSNAME)); FS_GETSPACEUSED_CLASSNAME));
@ -699,6 +702,7 @@ public String reconfigurePropertyImpl(String property, String newVal)
case DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY: case DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY:
case DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY: case DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY:
case DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY: case DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY:
case DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY:
return reconfSlowDiskParameters(property, newVal); return reconfSlowDiskParameters(property, newVal);
case FS_DU_INTERVAL_KEY: case FS_DU_INTERVAL_KEY:
case FS_GETSPACEUSED_JITTER_KEY: case FS_GETSPACEUSED_JITTER_KEY:
@ -877,6 +881,12 @@ private String reconfSlowDiskParameters(String property, String newVal)
Long.parseLong(newVal)); Long.parseLong(newVal));
result = Long.toString(threshold); result = Long.toString(threshold);
diskMetrics.setLowThresholdMs(threshold); diskMetrics.setLowThresholdMs(threshold);
} else if (property.equals(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY)) {
checkNotNull(diskMetrics, "DataNode disk stats may be disabled.");
int maxSlowDisksToExclude = (newVal == null ?
DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT : Integer.parseInt(newVal));
result = Integer.toString(maxSlowDisksToExclude);
diskMetrics.setMaxSlowDisksToExclude(maxSlowDisksToExclude);
} }
LOG.info("RECONFIGURE* changed {} to {}", property, newVal); LOG.info("RECONFIGURE* changed {} to {}", property, newVal);
return result; return result;

View File

@ -80,7 +80,7 @@ public class DataNodeDiskMetrics {
/** /**
* The number of slow disks that needs to be excluded. * The number of slow disks that needs to be excluded.
*/ */
private int maxSlowDisksToExclude; private volatile int maxSlowDisksToExclude;
/** /**
* List of slow disks that need to be excluded. * List of slow disks that need to be excluded.
*/ */
@ -274,6 +274,14 @@ public List<String> getSlowDisksToExclude() {
return slowDisksToExclude; return slowDisksToExclude;
} }
public int getMaxSlowDisksToExclude() {
return maxSlowDisksToExclude;
}
public void setMaxSlowDisksToExclude(int maxSlowDisksToExclude) {
this.maxSlowDisksToExclude = maxSlowDisksToExclude;
}
public void setLowThresholdMs(long thresholdMs) { public void setLowThresholdMs(long thresholdMs) {
Preconditions.checkArgument(thresholdMs > 0, Preconditions.checkArgument(thresholdMs > 0,
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY + " should be larger than 0"); DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY + " should be larger than 0");

View File

@ -45,6 +45,7 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNull;
@ -636,13 +637,15 @@ public void testSlowDiskParameters() throws ReconfigurationException, IOExceptio
String[] slowDisksParameters2 = { String[] slowDisksParameters2 = {
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY,
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY}; DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY,
DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY};
for (String parameter : slowDisksParameters2) { for (String parameter : slowDisksParameters2) {
dn.reconfigureProperty(parameter, "99"); dn.reconfigureProperty(parameter, "99");
} }
// Assert diskMetrics. // Assert diskMetrics.
assertEquals(99, dn.getDiskMetrics().getMinOutlierDetectionDisks()); assertEquals(99, dn.getDiskMetrics().getMinOutlierDetectionDisks());
assertEquals(99, dn.getDiskMetrics().getLowThresholdMs()); assertEquals(99, dn.getDiskMetrics().getLowThresholdMs());
assertEquals(99, dn.getDiskMetrics().getMaxSlowDisksToExclude());
// Assert dnConf. // Assert dnConf.
assertTrue(dn.getDnConf().diskStatsEnabled); assertTrue(dn.getDnConf().diskStatsEnabled);
// Assert profilingEventHook. // Assert profilingEventHook.
@ -673,12 +676,16 @@ public void testSlowDiskParameters() throws ReconfigurationException, IOExceptio
dn.reconfigureProperty(DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, "1"); dn.reconfigureProperty(DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, "1");
dn.reconfigureProperty(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, null); dn.reconfigureProperty(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, null);
dn.reconfigureProperty(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY, null); dn.reconfigureProperty(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY, null);
dn.reconfigureProperty(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY, null);
assertEquals(String.format("expect %s is not configured", assertEquals(String.format("expect %s is not configured",
DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY), null, DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY), null,
dn.getConf().get(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY)); dn.getConf().get(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY));
assertEquals(String.format("expect %s is not configured", assertEquals(String.format("expect %s is not configured",
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY), null, DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY), null,
dn.getConf().get(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY)); dn.getConf().get(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY));
assertEquals(String.format("expect %s is not configured",
DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY), null,
dn.getConf().get(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY));
assertEquals(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT, assertEquals(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT,
dn.getDiskMetrics().getSlowDiskDetector().getMinOutlierDetectionNodes()); dn.getDiskMetrics().getSlowDiskDetector().getMinOutlierDetectionNodes());
assertEquals(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT, assertEquals(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT,

View File

@ -345,7 +345,7 @@ public void testDataNodeGetReconfigurableProperties() throws IOException, Interr
final List<String> outs = Lists.newArrayList(); final List<String> outs = Lists.newArrayList();
final List<String> errs = Lists.newArrayList(); final List<String> errs = Lists.newArrayList();
getReconfigurableProperties("datanode", address, outs, errs); getReconfigurableProperties("datanode", address, outs, errs);
assertEquals(19, outs.size()); assertEquals(20, outs.size());
assertEquals(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, outs.get(1)); assertEquals(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, outs.get(1));
} }