[YARN-10687] Add option to disable/enable free disk space checking and percentage checking for full and not-full disks. Contributed by Qi Zhu.
This commit is contained in:
parent
5db4c0bf70
commit
5f067cf0f3
@ -2027,6 +2027,8 @@ public static boolean isAclEnabled(Configuration conf) {
|
|||||||
* marked as offline. Values can range from 0.0 to 100.0. If the value is
|
* marked as offline. Values can range from 0.0 to 100.0. If the value is
|
||||||
* greater than or equal to 100, NM will check for full disk. This applies to
|
* greater than or equal to 100, NM will check for full disk. This applies to
|
||||||
* nm-local-dirs and nm-log-dirs.
|
* nm-local-dirs and nm-log-dirs.
|
||||||
|
*
|
||||||
|
* This applies when disk-utilization-threshold.enabled is true.
|
||||||
*/
|
*/
|
||||||
public static final String NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
|
public static final String NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
|
||||||
NM_DISK_HEALTH_CHECK_PREFIX + "max-disk-utilization-per-disk-percentage";
|
NM_DISK_HEALTH_CHECK_PREFIX + "max-disk-utilization-per-disk-percentage";
|
||||||
@ -2036,6 +2038,17 @@ public static boolean isAclEnabled(Configuration conf) {
|
|||||||
public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
|
public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
|
||||||
90.0F;
|
90.0F;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable/Disable the disk utilisation percentage
|
||||||
|
* threshold for disk health checker.
|
||||||
|
*/
|
||||||
|
public static final String NM_DISK_UTILIZATION_THRESHOLD_ENABLED =
|
||||||
|
NM_DISK_HEALTH_CHECK_PREFIX +
|
||||||
|
"disk-utilization-threshold.enabled";
|
||||||
|
|
||||||
|
public static final
|
||||||
|
boolean DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The low threshold percentage of disk space used when an offline disk is
|
* The low threshold percentage of disk space used when an offline disk is
|
||||||
* marked as online. Values can range from 0.0 to 100.0. The value shouldn't
|
* marked as online. Values can range from 0.0 to 100.0. The value shouldn't
|
||||||
@ -2051,9 +2064,23 @@ public static boolean isAclEnabled(Configuration conf) {
|
|||||||
/**
|
/**
|
||||||
* The minimum space that must be available on a local dir for it to be used.
|
* The minimum space that must be available on a local dir for it to be used.
|
||||||
* This applies to nm-local-dirs and nm-log-dirs.
|
* This applies to nm-local-dirs and nm-log-dirs.
|
||||||
|
*
|
||||||
|
* This applies when disk-free-space-threshold.enabled is true.
|
||||||
*/
|
*/
|
||||||
public static final String NM_MIN_PER_DISK_FREE_SPACE_MB =
|
public static final String NM_MIN_PER_DISK_FREE_SPACE_MB =
|
||||||
NM_DISK_HEALTH_CHECK_PREFIX + "min-free-space-per-disk-mb";
|
NM_DISK_HEALTH_CHECK_PREFIX + "min-free-space-per-disk-mb";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable/Disable the minimum disk free
|
||||||
|
* space threshold for disk health checker.
|
||||||
|
*/
|
||||||
|
public static final String NM_DISK_FREE_SPACE_THRESHOLD_ENABLED =
|
||||||
|
NM_DISK_HEALTH_CHECK_PREFIX +
|
||||||
|
"disk-free-space-threshold.enabled";
|
||||||
|
|
||||||
|
public static final boolean
|
||||||
|
DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The minimum space that must be available on an offline
|
* The minimum space that must be available on an offline
|
||||||
* disk for it to be marked as online. The value should not be less
|
* disk for it to be marked as online. The value should not be less
|
||||||
|
@ -1809,12 +1809,27 @@
|
|||||||
<value>0.25</value>
|
<value>0.25</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Enable/Disable the disk utilisation percentage
|
||||||
|
threshold for disk health checker.</description>
|
||||||
|
<name>yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description> Enable/Disable the minimum disk free
|
||||||
|
space threshold for disk health checker.</description>
|
||||||
|
<name>yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>The maximum percentage of disk space utilization allowed after
|
<description>The maximum percentage of disk space utilization allowed after
|
||||||
which a disk is marked as bad. Values can range from 0.0 to 100.0.
|
which a disk is marked as bad. Values can range from 0.0 to 100.0.
|
||||||
If the value is greater than or equal to 100, the nodemanager will check
|
If the value is greater than or equal to 100, the nodemanager will check
|
||||||
for full disk. This applies to yarn.nodemanager.local-dirs and
|
for full disk. This applies to yarn.nodemanager.local-dirs and
|
||||||
yarn.nodemanager.log-dirs.</description>
|
yarn.nodemanager.log-dirs when
|
||||||
|
yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled is true.</description>
|
||||||
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
|
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
|
||||||
<value>90.0</value>
|
<value>90.0</value>
|
||||||
</property>
|
</property>
|
||||||
@ -1834,7 +1849,8 @@
|
|||||||
<description>The minimum space in megabytes that must be available on a disk for
|
<description>The minimum space in megabytes that must be available on a disk for
|
||||||
it to be used. If space on a disk falls below this threshold, it will be marked
|
it to be used. If space on a disk falls below this threshold, it will be marked
|
||||||
as bad. This applies to yarn.nodemanager.local-dirs and
|
as bad. This applies to yarn.nodemanager.local-dirs and
|
||||||
yarn.nodemanager.log-dirs.</description>
|
yarn.nodemanager.log-dirs when
|
||||||
|
yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled is true.</description>
|
||||||
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
|
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
|
||||||
<value>0</value>
|
<value>0</value>
|
||||||
</property>
|
</property>
|
||||||
|
@ -59,6 +59,9 @@ public class DirectoryCollection {
|
|||||||
|
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
private final DiskValidator diskValidator;
|
private final DiskValidator diskValidator;
|
||||||
|
|
||||||
|
private boolean diskUtilizationThresholdEnabled;
|
||||||
|
private boolean diskFreeSpaceThresholdEnabled;
|
||||||
/**
|
/**
|
||||||
* The enum defines disk failure type.
|
* The enum defines disk failure type.
|
||||||
*/
|
*/
|
||||||
@ -239,6 +242,17 @@ public DirectoryCollection(String[] dirs,
|
|||||||
throw new YarnRuntimeException(e);
|
throw new YarnRuntimeException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
diskUtilizationThresholdEnabled = conf.
|
||||||
|
getBoolean(YarnConfiguration.
|
||||||
|
NM_DISK_UTILIZATION_THRESHOLD_ENABLED,
|
||||||
|
YarnConfiguration.
|
||||||
|
DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED);
|
||||||
|
diskFreeSpaceThresholdEnabled = conf.
|
||||||
|
getBoolean(YarnConfiguration.
|
||||||
|
NM_DISK_FREE_SPACE_THRESHOLD_ENABLED,
|
||||||
|
YarnConfiguration.
|
||||||
|
DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED);
|
||||||
|
|
||||||
localDirs = new ArrayList<>(Arrays.asList(dirs));
|
localDirs = new ArrayList<>(Arrays.asList(dirs));
|
||||||
errorDirs = new ArrayList<>();
|
errorDirs = new ArrayList<>();
|
||||||
fullDirs = new ArrayList<>();
|
fullDirs = new ArrayList<>();
|
||||||
@ -520,7 +534,9 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs,
|
|||||||
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
|
diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
|
||||||
long diskFreeSpaceCutoff = goodDirs.contains(dir) ?
|
long diskFreeSpaceCutoff = goodDirs.contains(dir) ?
|
||||||
diskFreeSpaceCutoffLow : diskFreeSpaceCutoffHigh;
|
diskFreeSpaceCutoffLow : diskFreeSpaceCutoffHigh;
|
||||||
if (isDiskUsageOverPercentageLimit(testDir,
|
|
||||||
|
if (diskUtilizationThresholdEnabled
|
||||||
|
&& isDiskUsageOverPercentageLimit(testDir,
|
||||||
diskUtilizationPercentageCutoff)) {
|
diskUtilizationPercentageCutoff)) {
|
||||||
msg =
|
msg =
|
||||||
"used space above threshold of "
|
"used space above threshold of "
|
||||||
@ -529,7 +545,8 @@ Map<String, DiskErrorInformation> testDirs(List<String> dirs,
|
|||||||
ret.put(dir,
|
ret.put(dir,
|
||||||
new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
|
new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
|
||||||
continue;
|
continue;
|
||||||
} else if (isDiskFreeSpaceUnderLimit(testDir, diskFreeSpaceCutoff)) {
|
} else if (diskFreeSpaceThresholdEnabled
|
||||||
|
&& isDiskFreeSpaceUnderLimit(testDir, diskFreeSpaceCutoff)) {
|
||||||
msg =
|
msg =
|
||||||
"free space below limit of " + diskFreeSpaceCutoff
|
"free space below limit of " + diskFreeSpaceCutoff
|
||||||
+ "MB";
|
+ "MB";
|
||||||
@ -613,6 +630,28 @@ long getDiskUtilizationSpaceCutoffHigh() {
|
|||||||
return diskFreeSpaceCutoffHigh;
|
return diskFreeSpaceCutoffHigh;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
boolean getDiskUtilizationThresholdEnabled() {
|
||||||
|
return diskUtilizationThresholdEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
boolean getDiskFreeSpaceThresholdEnabled() {
|
||||||
|
return diskFreeSpaceThresholdEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setDiskUtilizationThresholdEnabled(boolean
|
||||||
|
utilizationEnabled) {
|
||||||
|
diskUtilizationThresholdEnabled = utilizationEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setDiskFreeSpaceThresholdEnabled(boolean
|
||||||
|
freeSpaceEnabled) {
|
||||||
|
diskFreeSpaceThresholdEnabled = freeSpaceEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
public void setDiskUtilizationSpaceCutoff(long freeSpaceCutoff) {
|
public void setDiskUtilizationSpaceCutoff(long freeSpaceCutoff) {
|
||||||
setDiskUtilizationSpaceCutoff(freeSpaceCutoff,
|
setDiskUtilizationSpaceCutoff(freeSpaceCutoff,
|
||||||
freeSpaceCutoff);
|
freeSpaceCutoff);
|
||||||
|
@ -176,6 +176,105 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
|
|||||||
dc.getGoodDirsDiskUtilizationPercentage());
|
dc.getGoodDirsDiskUtilizationPercentage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDiskSpaceUtilizationThresholdEnabled() throws IOException {
|
||||||
|
|
||||||
|
String dirA = new File(testDir, "dirA").getPath();
|
||||||
|
String[] dirs = {dirA};
|
||||||
|
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F);
|
||||||
|
|
||||||
|
// Disable disk utilization threshold.
|
||||||
|
dc.setDiskUtilizationThresholdEnabled(false);
|
||||||
|
Assert.assertFalse(dc.getDiskUtilizationThresholdEnabled());
|
||||||
|
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||||
|
|
||||||
|
// Enable disk utilization threshold.
|
||||||
|
dc.setDiskUtilizationThresholdEnabled(true);
|
||||||
|
Assert.assertTrue(dc.getDiskUtilizationThresholdEnabled());
|
||||||
|
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||||
|
Assert.assertNotNull(dc.getDirectoryErrorInfo(dirA));
|
||||||
|
Assert.assertEquals(DirectoryCollection.DiskErrorCause.DISK_FULL,
|
||||||
|
dc.getDirectoryErrorInfo(dirA).cause);
|
||||||
|
|
||||||
|
// no good dirs
|
||||||
|
Assert.assertEquals(0,
|
||||||
|
dc.getGoodDirsDiskUtilizationPercentage());
|
||||||
|
|
||||||
|
dc = new DirectoryCollection(dirs, 100.0F);
|
||||||
|
int utilizedSpacePerc =
|
||||||
|
(int) ((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
|
||||||
|
testDir.getTotalSpace());
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||||
|
|
||||||
|
Assert.assertEquals(utilizedSpacePerc,
|
||||||
|
dc.getGoodDirsDiskUtilizationPercentage());
|
||||||
|
|
||||||
|
dc = new DirectoryCollection(dirs,
|
||||||
|
testDir.getTotalSpace() / (1024 * 1024));
|
||||||
|
|
||||||
|
// Disable disk utilization threshold.
|
||||||
|
dc.setDiskUtilizationThresholdEnabled(false);
|
||||||
|
Assert.assertFalse(dc.getDiskUtilizationThresholdEnabled());
|
||||||
|
|
||||||
|
// Disable disk free space threshold.
|
||||||
|
dc.setDiskFreeSpaceThresholdEnabled(false);
|
||||||
|
Assert.assertFalse(dc.getDiskFreeSpaceThresholdEnabled());
|
||||||
|
dc.checkDirs();
|
||||||
|
|
||||||
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||||
|
|
||||||
|
dc = new DirectoryCollection(dirs,
|
||||||
|
testDir.getTotalSpace() / (1024 * 1024));
|
||||||
|
|
||||||
|
// Enable disk free space threshold.
|
||||||
|
dc.setDiskFreeSpaceThresholdEnabled(true);
|
||||||
|
Assert.assertTrue(dc.getDiskFreeSpaceThresholdEnabled());
|
||||||
|
|
||||||
|
dc.checkDirs();
|
||||||
|
|
||||||
|
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||||
|
Assert.assertNotNull(dc.getDirectoryErrorInfo(dirA));
|
||||||
|
// no good dirs
|
||||||
|
Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
|
||||||
|
|
||||||
|
dc = new DirectoryCollection(dirs, 100.0F, 100.0F, 0);
|
||||||
|
utilizedSpacePerc =
|
||||||
|
(int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
|
||||||
|
testDir.getTotalSpace());
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getErroredDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
Assert.assertNull(dc.getDirectoryErrorInfo(dirA));
|
||||||
|
|
||||||
|
Assert.assertEquals(utilizedSpacePerc,
|
||||||
|
dc.getGoodDirsDiskUtilizationPercentage());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDiskLimitsCutoffSetters() throws IOException {
|
public void testDiskLimitsCutoffSetters() throws IOException {
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user