diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index c4d6ec9cfa..b4801bc7d2 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -492,6 +492,9 @@ Release 2.8.0 - UNRELEASED
YARN-4215. RMNodeLabels Manager Need to verify and replace node labels for the
only modified Node Label Mappings in the request. (Naganarasimha G R via wangda)
+ YARN-3943. Use separate threshold configurations for disk-full detection
+ and disk-not-full detection. (Zhihai Xu via jlowe)
+
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 93b81a6d10..1e102e5370 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1059,6 +1059,18 @@ private static void addDeprecatedKeys() {
public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
90.0F;
+ /**
+ * The low threshold percentage of disk space used when an offline disk is
+ * marked as online. Values can range from 0.0 to 100.0. The value shouldn't
+ * be more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE. If its value is
+ * more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE or not set, it will be
+ * set to the same value as NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE.
+ * This applies to nm-local-dirs and nm-log-dirs.
+ */
+ public static final String NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE =
+ NM_DISK_HEALTH_CHECK_PREFIX +
+ "disk-utilization-watermark-low-per-disk-percentage";
+
/**
* The minimum space that must be available on a local dir for it to be used.
* This applies to nm-local-dirs and nm-log-dirs.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 29d26d1143..53793d2c47 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1317,6 +1317,17 @@
90.0
+
+ The low threshold percentage of disk space used when a bad disk is
+ marked as good. Values can range from 0.0 to 100.0. This applies to
+ yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs.
+ Note that if its value is more than yarn.nodemanager.disk-health-checker.
+ max-disk-utilization-per-disk-percentage or not set, it will be set to the same value as
+ yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage.
+ yarn.nodemanager.disk-health-checker.disk-utilization-watermark-low-per-disk-percentage
+
+
+
The minimum space that must be available on a disk for
it to be used. This applies to yarn-nodemanager.local-dirs and
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
index 32046c5fdf..a2bfd206c8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
@@ -39,6 +39,8 @@
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.DiskChecker;
+import com.google.common.annotations.VisibleForTesting;
+
/**
* Manages a list of local storage directories.
*/
@@ -88,8 +90,9 @@ static List concat(List l1, List l2) {
private List fullDirs;
private int numFailures;
-
- private float diskUtilizationPercentageCutoff;
+
+ private float diskUtilizationPercentageCutoffHigh;
+ private float diskUtilizationPercentageCutoffLow;
private long diskUtilizationSpaceCutoff;
private int goodDirsDiskUtilizationPercentage;
@@ -103,7 +106,7 @@ static List concat(List l1, List l2) {
* directories to be monitored
*/
public DirectoryCollection(String[] dirs) {
- this(dirs, 100.0F, 0);
+ this(dirs, 100.0F, 100.0F, 0);
}
/**
@@ -119,7 +122,7 @@ public DirectoryCollection(String[] dirs) {
*
*/
public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
- this(dirs, utilizationPercentageCutOff, 0);
+ this(dirs, utilizationPercentageCutOff, utilizationPercentageCutOff, 0);
}
/**
@@ -134,7 +137,7 @@ public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
*
*/
public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
- this(dirs, 100.0F, utilizationSpaceCutOff);
+ this(dirs, 100.0F, 100.0F, utilizationSpaceCutOff);
}
/**
@@ -145,25 +148,29 @@ public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
*
* @param dirs
* directories to be monitored
- * @param utilizationPercentageCutOff
+ * @param utilizationPercentageCutOffHigh
* percentage of disk that can be used before the dir is taken out of
* the good dirs list
+ * @param utilizationPercentageCutOffLow
+ * percentage of disk that can be used when the dir is moved from
+ * the bad dirs list to the good dirs list
* @param utilizationSpaceCutOff
* minimum space, in MB, that must be available on the disk for the
* dir to be marked as good
*
*/
- public DirectoryCollection(String[] dirs,
- float utilizationPercentageCutOff,
+ public DirectoryCollection(String[] dirs,
+ float utilizationPercentageCutOffHigh,
+ float utilizationPercentageCutOffLow,
long utilizationSpaceCutOff) {
localDirs = new CopyOnWriteArrayList(dirs);
errorDirs = new CopyOnWriteArrayList();
fullDirs = new CopyOnWriteArrayList();
- diskUtilizationPercentageCutoff =
- utilizationPercentageCutOff < 0.0F ? 0.0F
- : (utilizationPercentageCutOff > 100.0F ? 100.0F
- : utilizationPercentageCutOff);
+ diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
+ utilizationPercentageCutOffHigh));
+ diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
+ diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
diskUtilizationSpaceCutoff =
utilizationSpaceCutOff < 0 ? 0 : utilizationSpaceCutOff;
@@ -254,7 +261,8 @@ synchronized boolean checkDirs() {
List allLocalDirs =
DirectoryCollection.concat(localDirs, failedDirs);
- Map dirsFailedCheck = testDirs(allLocalDirs);
+ Map dirsFailedCheck = testDirs(allLocalDirs,
+ preCheckGoodDirs);
localDirs.clear();
errorDirs.clear();
@@ -314,7 +322,8 @@ synchronized boolean checkDirs() {
return setChanged;
}
- Map testDirs(List dirs) {
+ Map testDirs(List dirs,
+ Set goodDirs) {
HashMap ret =
new HashMap();
for (final String dir : dirs) {
@@ -322,7 +331,10 @@ Map testDirs(List dirs) {
try {
File testDir = new File(dir);
DiskChecker.checkDir(testDir);
- if (isDiskUsageOverPercentageLimit(testDir)) {
+ float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
+ diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
+ if (isDiskUsageOverPercentageLimit(testDir,
+ diskUtilizationPercentageCutoff)) {
msg =
"used space above threshold of "
+ diskUtilizationPercentageCutoff
@@ -374,7 +386,8 @@ private void verifyDirUsingMkdir(File dir) throws IOException {
}
}
- private boolean isDiskUsageOverPercentageLimit(File dir) {
+ private boolean isDiskUsageOverPercentageLimit(File dir,
+ float diskUtilizationPercentageCutoff) {
float freePercentage =
100 * (dir.getUsableSpace() / (float) dir.getTotalSpace());
float usedPercentage = 100.0F - freePercentage;
@@ -402,17 +415,24 @@ private void createDir(FileContext localFs, Path dir, FsPermission perm)
}
}
}
-
- public float getDiskUtilizationPercentageCutoff() {
- return diskUtilizationPercentageCutoff;
+
+ @VisibleForTesting
+ float getDiskUtilizationPercentageCutoffHigh() {
+ return diskUtilizationPercentageCutoffHigh;
+ }
+
+ @VisibleForTesting
+ float getDiskUtilizationPercentageCutoffLow() {
+ return diskUtilizationPercentageCutoffLow;
}
public void setDiskUtilizationPercentageCutoff(
- float diskUtilizationPercentageCutoff) {
- this.diskUtilizationPercentageCutoff =
- diskUtilizationPercentageCutoff < 0.0F ? 0.0F
- : (diskUtilizationPercentageCutoff > 100.0F ? 100.0F
- : diskUtilizationPercentageCutoff);
+ float utilizationPercentageCutOffHigh,
+ float utilizationPercentageCutOffLow) {
+ diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
+ utilizationPercentageCutOffHigh));
+ diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
+ diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
}
public long getDiskUtilizationSpaceCutoff() {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
index 769044ade5..5cc4e19287 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
@@ -114,22 +114,40 @@ public class LocalDirsHandlerService extends AbstractService {
private final class MonitoringTimerTask extends TimerTask {
public MonitoringTimerTask(Configuration conf) throws YarnRuntimeException {
- float maxUsableSpacePercentagePerDisk =
+ float highUsableSpacePercentagePerDisk =
conf.getFloat(
YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE);
+ float lowUsableSpacePercentagePerDisk =
+ conf.getFloat(
+ YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE,
+ highUsableSpacePercentagePerDisk);
+ if (lowUsableSpacePercentagePerDisk > highUsableSpacePercentagePerDisk) {
+ LOG.warn("Using " + YarnConfiguration.
+ NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE + " as " +
+ YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
+ ", because " + YarnConfiguration.
+ NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
+ " is not configured properly.");
+ lowUsableSpacePercentagePerDisk = highUsableSpacePercentagePerDisk;
+ }
long minFreeSpacePerDiskMB =
conf.getLong(YarnConfiguration.NM_MIN_PER_DISK_FREE_SPACE_MB,
YarnConfiguration.DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB);
localDirs =
new DirectoryCollection(
- validatePaths(conf
- .getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
- maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
+ validatePaths(conf
+ .getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
+ highUsableSpacePercentagePerDisk,
+ lowUsableSpacePercentagePerDisk,
+ minFreeSpacePerDiskMB);
logDirs =
new DirectoryCollection(
- validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
- maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
+ validatePaths(conf
+ .getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
+ highUsableSpacePercentagePerDisk,
+ lowUsableSpacePercentagePerDisk,
+ minFreeSpacePerDiskMB);
String local = conf.get(YarnConfiguration.NM_LOCAL_DIRS);
conf.set(NM_GOOD_LOCAL_DIRS,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
index 2fd89c601d..e529628b71 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java
@@ -152,7 +152,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
// no good dirs
Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
- dc = new DirectoryCollection(dirs, 100.0F, 0);
+ dc = new DirectoryCollection(dirs, 100.0F, 100.0F, 0);
utilizedSpacePerc =
(int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
testDir.getTotalSpace());
@@ -168,18 +168,28 @@ public void testDiskSpaceUtilizationLimit() throws IOException {
public void testDiskLimitsCutoffSetters() throws IOException {
String[] dirs = { "dir" };
- DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100);
+ DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 0.0F, 100);
float testValue = 57.5F;
float delta = 0.1F;
- dc.setDiskUtilizationPercentageCutoff(testValue);
- Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoff(),
- delta);
+ dc.setDiskUtilizationPercentageCutoff(testValue, 50.0F);
+ Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(50.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
+
testValue = -57.5F;
- dc.setDiskUtilizationPercentageCutoff(testValue);
- Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
+ Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
+
testValue = 157.5F;
- dc.setDiskUtilizationPercentageCutoff(testValue);
- Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
long spaceValue = 57;
dc.setDiskUtilizationSpaceCutoff(spaceValue);
@@ -200,7 +210,7 @@ public void testFailedDisksBecomingGoodAgain() throws Exception {
Assert.assertEquals(1, dc.getFailedDirs().size());
Assert.assertEquals(1, dc.getFullDirs().size());
- dc.setDiskUtilizationPercentageCutoff(100.0F);
+ dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
dc.checkDirs();
Assert.assertEquals(1, dc.getGoodDirs().size());
Assert.assertEquals(0, dc.getFailedDirs().size());
@@ -236,27 +246,45 @@ public void testConstructors() {
String[] dirs = { "dir" };
float delta = 0.1F;
DirectoryCollection dc = new DirectoryCollection(dirs);
- Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
dc = new DirectoryCollection(dirs, 57.5F);
- Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
dc = new DirectoryCollection(dirs, 57);
- Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoff());
- dc = new DirectoryCollection(dirs, 57.5F, 67);
- Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ dc = new DirectoryCollection(dirs, 57.5F, 50.5F, 67);
+ Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(50.5F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoff());
- dc = new DirectoryCollection(dirs, -57.5F, -67);
- Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ dc = new DirectoryCollection(dirs, -57.5F, -57.5F, -67);
+ Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
- dc = new DirectoryCollection(dirs, 157.5F, -67);
- Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+ dc = new DirectoryCollection(dirs, 157.5F, 157.5F, -67);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+ delta);
+ Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+ delta);
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
}
@@ -288,7 +316,7 @@ public void testDirsChangeListener() {
Assert.assertEquals(listener3.num, 1);
dc.deregisterDirsChangeListener(listener2);
- dc.setDiskUtilizationPercentageCutoff(100.0F);
+ dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
dc.checkDirs();
Assert.assertEquals(1, dc.getGoodDirs().size());
Assert.assertEquals(listener1.num, 3);