MAPREDUCE-3485. DISKS_FAILED -101 error code should be defined in same location as ABORTED_CONTAINER_EXIT_STATUS. (Ravi Gummadi via mahadev)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1210192 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1972a76e5a
commit
e948247715
@ -236,6 +236,9 @@ Release 0.23.1 - Unreleased
|
|||||||
MAPREDUCE-3458. Fix findbugs warnings in hadoop-examples. (Devaraj K
|
MAPREDUCE-3458. Fix findbugs warnings in hadoop-examples. (Devaraj K
|
||||||
via mahadev)
|
via mahadev)
|
||||||
|
|
||||||
|
MAPREDUCE-3485. DISKS_FAILED -101 error code should be defined in same location as
|
||||||
|
ABORTED_CONTAINER_EXIT_STATUS. (Ravi Gummadi via mahadev)
|
||||||
|
|
||||||
Release 0.23.0 - 2011-11-01
|
Release 0.23.0 - 2011-11-01
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -73,6 +73,11 @@ public interface ContainerStatus {
|
|||||||
* <p>Container killed by the framework, either due to being released by
|
* <p>Container killed by the framework, either due to being released by
|
||||||
* the application or being 'lost' due to node failures etc. have a special
|
* the application or being 'lost' due to node failures etc. have a special
|
||||||
* exit code of {@literal -100}.</p>
|
* exit code of {@literal -100}.</p>
|
||||||
|
*
|
||||||
|
* <p>When threshold number of the nodemanager-local-directories or
|
||||||
|
* threshold number of the nodemanager-log-directories become bad, then
|
||||||
|
* container is not launched and is exited with exit status of
|
||||||
|
* {@literal -101}.</p>
|
||||||
*
|
*
|
||||||
* @return <em>exit status</em> for the container
|
* @return <em>exit status</em> for the container
|
||||||
*/
|
*/
|
||||||
|
@ -431,6 +431,7 @@ public class YarnConfiguration extends Configuration {
|
|||||||
|
|
||||||
public static final int INVALID_CONTAINER_EXIT_STATUS = -1000;
|
public static final int INVALID_CONTAINER_EXIT_STATUS = -1000;
|
||||||
public static final int ABORTED_CONTAINER_EXIT_STATUS = -100;
|
public static final int ABORTED_CONTAINER_EXIT_STATUS = -100;
|
||||||
|
public static final int DISKS_FAILED = -101;
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Web Proxy Configs
|
// Web Proxy Configs
|
||||||
|
@ -122,8 +122,7 @@ public abstract void deleteAsUser(String user, Path subDir, Path... basedirs)
|
|||||||
|
|
||||||
public enum ExitCode {
|
public enum ExitCode {
|
||||||
FORCE_KILLED(137),
|
FORCE_KILLED(137),
|
||||||
TERMINATED(143),
|
TERMINATED(143);
|
||||||
DISKS_FAILED(-101);
|
|
||||||
private final int code;
|
private final int code;
|
||||||
|
|
||||||
private ExitCode(int exitCode) {
|
private ExitCode(int exitCode) {
|
||||||
|
@ -181,7 +181,7 @@ public Integer call() {
|
|||||||
List<String> logDirs = dirsHandler.getLogDirs();
|
List<String> logDirs = dirsHandler.getLogDirs();
|
||||||
|
|
||||||
if (!dirsHandler.areDisksHealthy()) {
|
if (!dirsHandler.areDisksHealthy()) {
|
||||||
ret = ExitCode.DISKS_FAILED.getExitCode();
|
ret = YarnConfiguration.DISKS_FAILED;
|
||||||
throw new IOException("Most of the disks failed. "
|
throw new IOException("Most of the disks failed. "
|
||||||
+ dirsHandler.getDisksHealthReport());
|
+ dirsHandler.getDisksHealthReport());
|
||||||
}
|
}
|
||||||
|
@ -403,9 +403,10 @@ Hadoop MapReduce Next Generation - Cluster Setup
|
|||||||
the health of the local disks (specifically checks nodemanager-local-dirs
|
the health of the local disks (specifically checks nodemanager-local-dirs
|
||||||
and nodemanager-log-dirs) and after reaching the threshold of number of
|
and nodemanager-log-dirs) and after reaching the threshold of number of
|
||||||
bad directories based on the value set for the config property
|
bad directories based on the value set for the config property
|
||||||
yarn.nodemanager.disk-health-checker.min-healthy-disks. The boot disk is
|
yarn.nodemanager.disk-health-checker.min-healthy-disks, the whole node is
|
||||||
either raided or a failure in the boot disk is identified by the health
|
marked unhealthy and this info is sent to resource manager also. The boot
|
||||||
checker script.
|
disk is either raided or a failure in the boot disk is identified by the
|
||||||
|
health checker script.
|
||||||
|
|
||||||
* {Slaves file}
|
* {Slaves file}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user