diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c4d6ec9cfa..b4801bc7d2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -492,6 +492,9 @@ Release 2.8.0 - UNRELEASED YARN-4215. RMNodeLabels Manager Need to verify and replace node labels for the only modified Node Label Mappings in the request. (Naganarasimha G R via wangda) + YARN-3943. Use separate threshold configurations for disk-full detection + and disk-not-full detection. (Zhihai Xu via jlowe) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 93b81a6d10..1e102e5370 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1059,6 +1059,18 @@ private static void addDeprecatedKeys() { public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE = 90.0F; + /** + * The low threshold percentage of disk space used when an offline disk is + * marked as online. Values can range from 0.0 to 100.0. The value shouldn't + * be more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE. If its value is + * more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE or not set, it will be + * set to the same value as NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE. + * This applies to nm-local-dirs and nm-log-dirs. + */ + public static final String NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE = + NM_DISK_HEALTH_CHECK_PREFIX + + "disk-utilization-watermark-low-per-disk-percentage"; + /** * The minimum space that must be available on a local dir for it to be used. * This applies to nm-local-dirs and nm-log-dirs. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 29d26d1143..53793d2c47 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1317,6 +1317,17 @@ 90.0 + + The low threshold percentage of disk space used when a bad disk is + marked as good. Values can range from 0.0 to 100.0. This applies to + yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs. + Note that if its value is more than yarn.nodemanager.disk-health-checker. + max-disk-utilization-per-disk-percentage or not set, it will be set to the same value as + yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage. + yarn.nodemanager.disk-health-checker.disk-utilization-watermark-low-per-disk-percentage + + + The minimum space that must be available on a disk for it to be used. This applies to yarn-nodemanager.local-dirs and diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java index 32046c5fdf..a2bfd206c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java @@ -39,6 +39,8 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.DiskChecker; +import com.google.common.annotations.VisibleForTesting; + /** * Manages a list of local storage directories. */ @@ -88,8 +90,9 @@ static List concat(List l1, List l2) { private List fullDirs; private int numFailures; - - private float diskUtilizationPercentageCutoff; + + private float diskUtilizationPercentageCutoffHigh; + private float diskUtilizationPercentageCutoffLow; private long diskUtilizationSpaceCutoff; private int goodDirsDiskUtilizationPercentage; @@ -103,7 +106,7 @@ static List concat(List l1, List l2) { * directories to be monitored */ public DirectoryCollection(String[] dirs) { - this(dirs, 100.0F, 0); + this(dirs, 100.0F, 100.0F, 0); } /** @@ -119,7 +122,7 @@ public DirectoryCollection(String[] dirs) { * */ public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) { - this(dirs, utilizationPercentageCutOff, 0); + this(dirs, utilizationPercentageCutOff, utilizationPercentageCutOff, 0); } /** @@ -134,7 +137,7 @@ public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) { * */ public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) { - this(dirs, 100.0F, utilizationSpaceCutOff); + this(dirs, 100.0F, 100.0F, utilizationSpaceCutOff); } /** @@ -145,25 +148,29 @@ public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) { * * @param dirs * directories to be monitored - * @param utilizationPercentageCutOff + * @param utilizationPercentageCutOffHigh * percentage of disk that can be used before the dir is taken out of * the good dirs list + * @param utilizationPercentageCutOffLow + * percentage of disk that can be used when the dir is moved from + * the bad dirs list to the good dirs list * @param utilizationSpaceCutOff * minimum space, in MB, that must be available on the disk for the * dir to be marked as good * */ - public DirectoryCollection(String[] dirs, - float utilizationPercentageCutOff, + public DirectoryCollection(String[] dirs, + float utilizationPercentageCutOffHigh, + float utilizationPercentageCutOffLow, long utilizationSpaceCutOff) { localDirs = new CopyOnWriteArrayList(dirs); errorDirs = new CopyOnWriteArrayList(); fullDirs = new CopyOnWriteArrayList(); - diskUtilizationPercentageCutoff = - utilizationPercentageCutOff < 0.0F ? 0.0F - : (utilizationPercentageCutOff > 100.0F ? 100.0F - : utilizationPercentageCutOff); + diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F, + utilizationPercentageCutOffHigh)); + diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min( + diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow)); diskUtilizationSpaceCutoff = utilizationSpaceCutOff < 0 ? 0 : utilizationSpaceCutOff; @@ -254,7 +261,8 @@ synchronized boolean checkDirs() { List allLocalDirs = DirectoryCollection.concat(localDirs, failedDirs); - Map dirsFailedCheck = testDirs(allLocalDirs); + Map dirsFailedCheck = testDirs(allLocalDirs, + preCheckGoodDirs); localDirs.clear(); errorDirs.clear(); @@ -314,7 +322,8 @@ synchronized boolean checkDirs() { return setChanged; } - Map testDirs(List dirs) { + Map testDirs(List dirs, + Set goodDirs) { HashMap ret = new HashMap(); for (final String dir : dirs) { @@ -322,7 +331,10 @@ Map testDirs(List dirs) { try { File testDir = new File(dir); DiskChecker.checkDir(testDir); - if (isDiskUsageOverPercentageLimit(testDir)) { + float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ? + diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow; + if (isDiskUsageOverPercentageLimit(testDir, + diskUtilizationPercentageCutoff)) { msg = "used space above threshold of " + diskUtilizationPercentageCutoff @@ -374,7 +386,8 @@ private void verifyDirUsingMkdir(File dir) throws IOException { } } - private boolean isDiskUsageOverPercentageLimit(File dir) { + private boolean isDiskUsageOverPercentageLimit(File dir, + float diskUtilizationPercentageCutoff) { float freePercentage = 100 * (dir.getUsableSpace() / (float) dir.getTotalSpace()); float usedPercentage = 100.0F - freePercentage; @@ -402,17 +415,24 @@ private void createDir(FileContext localFs, Path dir, FsPermission perm) } } } - - public float getDiskUtilizationPercentageCutoff() { - return diskUtilizationPercentageCutoff; + + @VisibleForTesting + float getDiskUtilizationPercentageCutoffHigh() { + return diskUtilizationPercentageCutoffHigh; + } + + @VisibleForTesting + float getDiskUtilizationPercentageCutoffLow() { + return diskUtilizationPercentageCutoffLow; } public void setDiskUtilizationPercentageCutoff( - float diskUtilizationPercentageCutoff) { - this.diskUtilizationPercentageCutoff = - diskUtilizationPercentageCutoff < 0.0F ? 0.0F - : (diskUtilizationPercentageCutoff > 100.0F ? 100.0F - : diskUtilizationPercentageCutoff); + float utilizationPercentageCutOffHigh, + float utilizationPercentageCutOffLow) { + diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F, + utilizationPercentageCutOffHigh)); + diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min( + diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow)); } public long getDiskUtilizationSpaceCutoff() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index 769044ade5..5cc4e19287 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -114,22 +114,40 @@ public class LocalDirsHandlerService extends AbstractService { private final class MonitoringTimerTask extends TimerTask { public MonitoringTimerTask(Configuration conf) throws YarnRuntimeException { - float maxUsableSpacePercentagePerDisk = + float highUsableSpacePercentagePerDisk = conf.getFloat( YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE); + float lowUsableSpacePercentagePerDisk = + conf.getFloat( + YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE, + highUsableSpacePercentagePerDisk); + if (lowUsableSpacePercentagePerDisk > highUsableSpacePercentagePerDisk) { + LOG.warn("Using " + YarnConfiguration. + NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE + " as " + + YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE + + ", because " + YarnConfiguration. + NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE + + " is not configured properly."); + lowUsableSpacePercentagePerDisk = highUsableSpacePercentagePerDisk; + } long minFreeSpacePerDiskMB = conf.getLong(YarnConfiguration.NM_MIN_PER_DISK_FREE_SPACE_MB, YarnConfiguration.DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB); localDirs = new DirectoryCollection( - validatePaths(conf - .getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)), - maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB); + validatePaths(conf + .getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)), + highUsableSpacePercentagePerDisk, + lowUsableSpacePercentagePerDisk, + minFreeSpacePerDiskMB); logDirs = new DirectoryCollection( - validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)), - maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB); + validatePaths(conf + .getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)), + highUsableSpacePercentagePerDisk, + lowUsableSpacePercentagePerDisk, + minFreeSpacePerDiskMB); String local = conf.get(YarnConfiguration.NM_LOCAL_DIRS); conf.set(NM_GOOD_LOCAL_DIRS, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java index 2fd89c601d..e529628b71 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java @@ -152,7 +152,7 @@ public void testDiskSpaceUtilizationLimit() throws IOException { // no good dirs Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage()); - dc = new DirectoryCollection(dirs, 100.0F, 0); + dc = new DirectoryCollection(dirs, 100.0F, 100.0F, 0); utilizedSpacePerc = (int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 / testDir.getTotalSpace()); @@ -168,18 +168,28 @@ public void testDiskSpaceUtilizationLimit() throws IOException { public void testDiskLimitsCutoffSetters() throws IOException { String[] dirs = { "dir" }; - DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100); + DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 0.0F, 100); float testValue = 57.5F; float delta = 0.1F; - dc.setDiskUtilizationPercentageCutoff(testValue); - Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoff(), - delta); + dc.setDiskUtilizationPercentageCutoff(testValue, 50.0F); + Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(50.0F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); + testValue = -57.5F; - dc.setDiskUtilizationPercentageCutoff(testValue); - Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta); + dc.setDiskUtilizationPercentageCutoff(testValue, testValue); + Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); + testValue = 157.5F; - dc.setDiskUtilizationPercentageCutoff(testValue); - Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta); + dc.setDiskUtilizationPercentageCutoff(testValue, testValue); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); long spaceValue = 57; dc.setDiskUtilizationSpaceCutoff(spaceValue); @@ -200,7 +210,7 @@ public void testFailedDisksBecomingGoodAgain() throws Exception { Assert.assertEquals(1, dc.getFailedDirs().size()); Assert.assertEquals(1, dc.getFullDirs().size()); - dc.setDiskUtilizationPercentageCutoff(100.0F); + dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F); dc.checkDirs(); Assert.assertEquals(1, dc.getGoodDirs().size()); Assert.assertEquals(0, dc.getFailedDirs().size()); @@ -236,27 +246,45 @@ public void testConstructors() { String[] dirs = { "dir" }; float delta = 0.1F; DirectoryCollection dc = new DirectoryCollection(dirs); - Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); dc = new DirectoryCollection(dirs, 57.5F); - Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta); + Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); dc = new DirectoryCollection(dirs, 57); - Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoff()); - dc = new DirectoryCollection(dirs, 57.5F, 67); - Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta); + dc = new DirectoryCollection(dirs, 57.5F, 50.5F, 67); + Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(50.5F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoff()); - dc = new DirectoryCollection(dirs, -57.5F, -67); - Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta); + dc = new DirectoryCollection(dirs, -57.5F, -57.5F, -67); + Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); - dc = new DirectoryCollection(dirs, 157.5F, -67); - Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta); + dc = new DirectoryCollection(dirs, 157.5F, 157.5F, -67); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(), + delta); + Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(), + delta); Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff()); } @@ -288,7 +316,7 @@ public void testDirsChangeListener() { Assert.assertEquals(listener3.num, 1); dc.deregisterDirsChangeListener(listener2); - dc.setDiskUtilizationPercentageCutoff(100.0F); + dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F); dc.checkDirs(); Assert.assertEquals(1, dc.getGoodDirs().size()); Assert.assertEquals(listener1.num, 3);