From e948247715ba001b00eafc5f801fa926c409ea5a Mon Sep 17 00:00:00 2001 From: Mahadev Konar Date: Sun, 4 Dec 2011 20:11:08 +0000 Subject: [PATCH] MAPREDUCE-3485. DISKS_FAILED -101 error code should be defined in same location as ABORTED_CONTAINER_EXIT_STATUS. (Ravi Gummadi via mahadev) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1210192 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../apache/hadoop/yarn/api/records/ContainerStatus.java | 5 +++++ .../org/apache/hadoop/yarn/conf/YarnConfiguration.java | 1 + .../hadoop/yarn/server/nodemanager/ContainerExecutor.java | 3 +-- .../containermanager/launcher/ContainerLaunch.java | 2 +- .../hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm | 7 ++++--- 6 files changed, 15 insertions(+), 6 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 305742eeb8..5aef1ec2b2 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -236,6 +236,9 @@ Release 0.23.1 - Unreleased MAPREDUCE-3458. Fix findbugs warnings in hadoop-examples. (Devaraj K via mahadev) + MAPREDUCE-3485. DISKS_FAILED -101 error code should be defined in same location as + ABORTED_CONTAINER_EXIT_STATUS. (Ravi Gummadi via mahadev) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java index e2dfc82c4b..1ef35ac289 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java @@ -73,6 +73,11 @@ public interface ContainerStatus { *

Container killed by the framework, either due to being released by * the application or being 'lost' due to node failures etc. have a special * exit code of {@literal -100}.

+ * + *

When threshold number of the nodemanager-local-directories or + * threshold number of the nodemanager-log-directories become bad, then + * container is not launched and is exited with exit status of + * {@literal -101}.

* * @return exit status for the container */ diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index d4b8f9fc56..fa4ffa1656 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -431,6 +431,7 @@ public class YarnConfiguration extends Configuration { public static final int INVALID_CONTAINER_EXIT_STATUS = -1000; public static final int ABORTED_CONTAINER_EXIT_STATUS = -100; + public static final int DISKS_FAILED = -101; //////////////////////////////// // Web Proxy Configs diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index e6a47da89c..9cffde1a65 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -122,8 +122,7 @@ public abstract void deleteAsUser(String user, Path subDir, Path... basedirs) public enum ExitCode { FORCE_KILLED(137), - TERMINATED(143), - DISKS_FAILED(-101); + TERMINATED(143); private final int code; private ExitCode(int exitCode) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 15de5d2749..821d4a042b 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -181,7 +181,7 @@ public Integer call() { List logDirs = dirsHandler.getLogDirs(); if (!dirsHandler.areDisksHealthy()) { - ret = ExitCode.DISKS_FAILED.getExitCode(); + ret = YarnConfiguration.DISKS_FAILED; throw new IOException("Most of the disks failed. " + dirsHandler.getDisksHealthReport()); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm index 079c54b48b..98e19b2c51 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm @@ -403,9 +403,10 @@ Hadoop MapReduce Next Generation - Cluster Setup the health of the local disks (specifically checks nodemanager-local-dirs and nodemanager-log-dirs) and after reaching the threshold of number of bad directories based on the value set for the config property - yarn.nodemanager.disk-health-checker.min-healthy-disks. The boot disk is - either raided or a failure in the boot disk is identified by the health - checker script. + yarn.nodemanager.disk-health-checker.min-healthy-disks, the whole node is + marked unhealthy and this info is sent to resource manager also. The boot + disk is either raided or a failure in the boot disk is identified by the + health checker script. * {Slaves file}