diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java index 7c46c5b7ec..f2a5b242a8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NodeHealthScriptRunner.java @@ -163,6 +163,7 @@ public class NodeHealthScriptRunner extends AbstractService { setHealthStatus(false, exceptionStackTrace); break; case FAILED_WITH_EXIT_CODE: + // see Javadoc above - we don't report bad health intentionally setHealthStatus(true, "", now); break; case FAILED: diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java index 8fc64d10a2..2748c0b581 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNodeHealthScriptRunner.java @@ -94,6 +94,8 @@ public class TestNodeHealthScriptRunner { String timeOutScript = Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\"" : "sleep 4\necho \"I am fine\""; + String exitCodeScript = "exit 127"; + Configuration conf = new Configuration(); writeNodeHealthScriptFile(normalScript, true); NodeHealthScriptRunner nodeHealthScriptRunner = new NodeHealthScriptRunner( @@ -132,5 +134,12 @@ public class TestNodeHealthScriptRunner { Assert.assertEquals( NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG, nodeHealthScriptRunner.getHealthReport()); + + // Exit code 127 + writeNodeHealthScriptFile(exitCodeScript, true); + timerTask.run(); + Assert.assertTrue("Node health status reported unhealthy", + nodeHealthScriptRunner.isHealthy()); + Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md index 5b9f0ef820..e4ed57f5cb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManager.md @@ -44,7 +44,13 @@ The following configuration parameters can be used to modify the disk checks: ###External Health Script -Users may specify their own health checker script that will be invoked by the health checker service. Users may specify a timeout as well as options to be passed to the script. If the script exits with a non-zero exit code, times out or results in an exception being thrown, the node is marked as unhealthy. Please note that if the script cannot be executed due to permissions or an incorrect path, etc, then it counts as a failure and the node will be reported as unhealthy. Please note that speifying a health check script is not mandatory. If no script is specified, only the disk checker status will be used to determine the health of the node. +Users may specify their own health checker script that will be invoked by the health checker service. Users may specify a timeout as well as options to be passed to the script. If the script times out, results in an exception being thrown or outputs a line which begins with the string ERROR, the node is marked as unhealthy. Please note that: + + * Exit code other than 0 is **not** considered to be a failure because it might have been caused by a syntax error. Therefore the node will **not** be marked as unhealthy. + + * If the script cannot be executed due to permissions or an incorrect path, etc, then it counts as a failure and the node will be reported as unhealthy. + + * Specifying a health check script is not mandatory. If no script is specified, only the disk checker status will be used to determine the health of the node. The following configuration parameters can be used to set the health script: