[YARN-10687] Add option to disable/enable free disk space checking and percentage checking for full and not-full disks. Contributed by Qi Zhu.
This commit is contained in:
parent
5db4c0bf70
commit
5f067cf0f3
@ -2027,6 +2027,8 @@ public static boolean isAclEnabled(Configuration conf) {
|
||||
* marked as offline. Values can range from 0.0 to 100.0. If the value is
|
||||
* greater than or equal to 100, NM will check for full disk. This applies to
|
||||
* nm-local-dirs and nm-log-dirs.
|
||||
*
|
||||
* This applies when disk-utilization-threshold.enabled is true.
|
||||
*/
|
||||
public static final String NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
|
||||
NM_DISK_HEALTH_CHECK_PREFIX + "max-disk-utilization-per-disk-percentage";
|
||||
@ -2036,6 +2038,17 @@ public static boolean isAclEnabled(Configuration conf) {
|
||||
public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
|
||||
90.0F;
|
||||
|
||||
/**
|
||||
* Enable/Disable the disk utilisation percentage
|
||||
* threshold for disk health checker.
|
||||
*/
|
||||
public static final String NM_DISK_UTILIZATION_THRESHOLD_ENABLED =
|
||||
NM_DISK_HEALTH_CHECK_PREFIX +
|
||||
"disk-utilization-threshold.enabled";
|
||||
|
||||
public static final
|
||||
boolean DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED = true;
|
||||
|
||||
/**
|
||||
* The low threshold percentage of disk space used when an offline disk is
|
||||
* marked as online. Values can range from 0.0 to 100.0. The value shouldn't
|
||||
@ -2051,9 +2064,23 @@ public static boolean isAclEnabled(Configuration conf) {
|
||||
/**
|
||||
* The minimum space that must be available on a local dir for it to be used.
|
||||
* This applies to nm-local-dirs and nm-log-dirs.
|
||||
*
|
||||
* This applies when disk-free-space-threshold.enabled is true.
|
||||
*/
|
||||
public static final String NM_MIN_PER_DISK_FREE_SPACE_MB =
|
||||
NM_DISK_HEALTH_CHECK_PREFIX + "min-free-space-per-disk-mb";
|
||||
|
||||
/**
|
||||
* Enable/Disable the minimum disk free
|
||||
* space threshold for disk health checker.
|
||||
*/
|
||||
public static final String NM_DISK_FREE_SPACE_THRESHOLD_ENABLED =
|
||||
NM_DISK_HEALTH_CHECK_PREFIX +
|
||||
"disk-free-space-threshold.enabled";
|
||||
|
||||
public static final boolean
|
||||
DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED = true;
|
||||
|
||||
/**
|
||||
* The minimum space that must be available on an offline
|
||||
* disk for it to be marked as online. The value should not be less
|
||||
|
@ -1809,12 +1809,27 @@
|
||||
<value>0.25</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Enable/Disable the disk utilisation percentage
|
||||
threshold for disk health checker.</description>
|
||||
<name>yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description> Enable/Disable the minimum disk free
|
||||
space threshold for disk health checker.</description>
|
||||
<name>yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The maximum percentage of disk space utilization allowed after
|
||||
which a disk is marked as bad. Values can range from 0.0 to 100.0.
|
||||
If the value is greater than or equal to 100, the nodemanager will check
|
||||
for full disk. This applies to yarn.nodemanager.local-dirs and
|
||||
yarn.nodemanager.log-dirs.</description>
|
||||
yarn.nodemanager.log-dirs when
|
||||
yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled is true.</description>
|
||||
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
|
||||
<value>90.0</value>
|
||||
</property>
|
||||
@ -1834,7 +1849,8 @@
|
||||
<description>The minimum space in megabytes that must be available on a disk for
|
||||
it to be used. If space on a disk falls below this threshold, it will be marked
|
||||
as bad. This applies to yarn.nodemanager.local-dirs and
|
||||
yarn.nodemanager.log-dirs.</description>
|
||||
yarn.nodemanager.log-dirs when
|
||||
yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled is true.</description>
|
||||
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
|
||||
<value>0</value>
|
||||
</property>
|
||||
|
@ -59,6 +59,9 @@ public class DirectoryCollection {
|
||||
|
||||
private final Configuration conf;
|
||||
private final DiskValidator diskValidator;
|
||||
|
||||
private boolean diskUtilizationThresholdEnabled;
|
||||
private boolean diskFreeSpaceThresholdEnabled;
|
||||
/**
|
||||
* The enum defines disk failure type.
|
||||
*/
|
||||
@ -239,6 +242,17 @@ public DirectoryCollection(String[] dirs,
|
||||
throw new YarnRuntimeException(e);
|
||||
}
|
||||
|
||||
diskUtilizationThresholdEnabled = conf.
|
||||
getBoolean(YarnConfiguration.
|
||||
NM_DISK_UTILIZATION_THRESHOLD_ENABLED,
|
||||
YarnConfiguration.
|
||||
DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED);
|
||||
diskFreeSpaceThresholdEnabled = conf.
|
||||
getBoolean(YarnConfiguration.
|
||||
NM_DISK_FREE_SPACE_THRESHOLD_ENABLED,
|
||||
YarnConfiguration.
|
||||
DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED);
|
||||
|
||||
localDirs = new ArrayList<>(Arrays.asList(dirs));
|
||||
errorDirs = new ArrayList<>();
|
||||
fullDirs = new ArrayList<>();
|
||||
@ -520,7 +534,9 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs,
|
||||
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
|
||||
long diskFreeSpaceCutoff = goodDirs.contains(dir) ?
|
||||
diskFreeSpaceCutoffLow : diskFreeSpaceCutoffHigh;
|
||||
if (isDiskUsageOverPercentageLimit(testDir,
|
||||
|
||||
if (diskUtilizationThresholdEnabled
|
||||
&& isDiskUsageOverPercentageLimit(testDir,
|
||||
diskUtilizationPercentageCutoff)) {
|
||||
msg =
|
||||
"used space above threshold of "
|
||||
@ -529,7 +545,8 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs,
|
||||
ret.put(dir,
|
||||
new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
|
||||
continue;
|
||||
} else if (isDiskFreeSpaceUnderLimit(testDir, diskFreeSpaceCutoff)) {
|
||||
} else if (diskFreeSpaceThresholdEnabled
|
||||
&& isDiskFreeSpaceUnderLimit(testDir, diskFreeSpaceCutoff)) {
|
||||
msg =
|
||||
"free space below limit of " + diskFreeSpaceCutoff
|
||||
+ "MB";
|
||||
@ -613,6 +630,28 @@ long getDiskUtilizationSpaceCutoffHigh() {
|
||||
return diskFreeSpaceCutoffHigh;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
boolean getDiskUtilizationThresholdEnabled() {
|
||||
return diskUtilizationThresholdEnabled;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
boolean getDiskFreeSpaceThresholdEnabled() {
|
||||
return diskFreeSpaceThresholdEnabled;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
void setDiskUtilizationThresholdEnabled(boolean
|
||||
utilizationEnabled) {
|
||||
diskUtilizationThresholdEnabled = utilizationEnabled;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
void setDiskFreeSpaceThresholdEnabled(boolean
|
||||
freeSpaceEnabled) {
|
||||
diskFreeSpaceThresholdEnabled = freeSpaceEnabled;
|
||||
}
|
||||
|
||||
public void setDiskUtilizationSpaceCutoff(long freeSpaceCutoff) {
|
||||
setDiskUtilizationSpaceCutoff(freeSpaceCutoff,
|
||||
freeSpaceCutoff);
|
||||
|
@ -176,6 +176,105 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
|
||||
dc.getGoodDirsDiskUtilizationPercentage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiskSpaceUtilizationThresholdEnabled() throws IOException {
|
||||
|
||||
String dirA = new File(testDir, "dirA").getPath();
|
||||
String[] dirs = {dirA};
|
||||
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F);
|
||||
|
||||
// Disable disk utilization threshold.
|
||||
dc.setDiskUtilizationThresholdEnabled(false);
|
||||
Assert.assertFalse(dc.getDiskUtilizationThresholdEnabled());
|
||||
|
||||
dc.checkDirs();
|
||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||
|
||||
// Enable disk utilization threshold.
|
||||
dc.setDiskUtilizationThresholdEnabled(true);
|
||||
Assert.assertTrue(dc.getDiskUtilizationThresholdEnabled());
|
||||
|
||||
dc.checkDirs();
|
||||
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||
Assert.assertNotNull(dc.getDirectoryErrorInfo(dirA));
|
||||
Assert.assertEquals(DirectoryCollection.DiskErrorCause.DISK_FULL,
|
||||
dc.getDirectoryErrorInfo(dirA).cause);
|
||||
|
||||
// no good dirs
|
||||
Assert.assertEquals(0,
|
||||
dc.getGoodDirsDiskUtilizationPercentage());
|
||||
|
||||
dc = new DirectoryCollection(dirs, 100.0F);
|
||||
int utilizedSpacePerc =
|
||||
(int) ((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
|
||||
testDir.getTotalSpace());
|
||||
dc.checkDirs();
|
||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||
|
||||
Assert.assertEquals(utilizedSpacePerc,
|
||||
dc.getGoodDirsDiskUtilizationPercentage());
|
||||
|
||||
dc = new DirectoryCollection(dirs,
|
||||
testDir.getTotalSpace() / (1024 * 1024));
|
||||
|
||||
// Disable disk utilization threshold.
|
||||
dc.setDiskUtilizationThresholdEnabled(false);
|
||||
Assert.assertFalse(dc.getDiskUtilizationThresholdEnabled());
|
||||
|
||||
// Disable disk free space threshold.
|
||||
dc.setDiskFreeSpaceThresholdEnabled(false);
|
||||
Assert.assertFalse(dc.getDiskFreeSpaceThresholdEnabled());
|
||||
dc.checkDirs();
|
||||
|
||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||
|
||||
dc = new DirectoryCollection(dirs,
|
||||
testDir.getTotalSpace() / (1024 * 1024));
|
||||
|
||||
// Enable disk free space threshold.
|
||||
dc.setDiskFreeSpaceThresholdEnabled(true);
|
||||
Assert.assertTrue(dc.getDiskFreeSpaceThresholdEnabled());
|
||||
|
||||
dc.checkDirs();
|
||||
|
||||
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||
Assert.assertNotNull(dc.getDirectoryErrorInfo(dirA));
|
||||
// no good dirs
|
||||
Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
|
||||
|
||||
dc = new DirectoryCollection(dirs, 100.0F, 100.0F, 0);
|
||||
utilizedSpacePerc =
|
||||
(int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
|
||||
testDir.getTotalSpace());
|
||||
dc.checkDirs();
|
||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||
|
||||
Assert.assertEquals(utilizedSpacePerc,
|
||||
dc.getGoodDirsDiskUtilizationPercentage());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDiskLimitsCutoffSetters() throws IOException {
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user