diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index ef06299fcf..bbb1ed6f8a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -150,7 +150,11 @@ private static void addDeprecatedKeys() {
public static final String NM_LOG_CONTAINER_DEBUG_INFO =
YarnConfiguration.NM_PREFIX + "log-container-debug-info.enabled";
+ public static final String NM_LOG_CONTAINER_DEBUG_INFO_ON_ERROR =
+ YarnConfiguration.NM_PREFIX + "log-container-debug-info-on-error.enabled";
+
public static final boolean DEFAULT_NM_LOG_CONTAINER_DEBUG_INFO = true;
+ public static final boolean DEFAULT_NM_LOG_CONTAINER_DEBUG_INFO_ON_ERROR = false;
////////////////////////////////
// IPC Configs
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 9697f7aa88..9fa600db4b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1656,6 +1656,21 @@
true
+
+ Generate additional logs about container launches,
+ if container returned with non-zero exit code.
+ Currently, this creates a copy of the launch script and lists the
+ directory contents of the container work dir. When listing directory
+ contents, we follow symlinks to a max-depth of 5(including symlinks
+ which point to outside the container work dir) which may lead to a
+ slowness in launching containers.
+ If yarn.nodemanager.log-container-debug-info.enabled is true,
+ it does not have effect on the behavior.
+
+ yarn.nodemanager.log-container-debug-info-on-error.enabled
+ false
+
+
Amount of physical memory, in MB, that can be allocated
for containers. If set to -1 and
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
index 65e8183f69..3d0dca622c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
@@ -102,6 +102,7 @@ public abstract class ContainerExecutor implements Configurable {
private String[] whitelistVars;
private int exitCodeFileTimeout =
YarnConfiguration.DEFAULT_NM_CONTAINER_EXECUTOR_EXIT_FILE_TIMEOUT;
+ private int containerExitCode;
@Override
public void setConf(Configuration conf) {
@@ -303,7 +304,7 @@ public int reacquireContainer(ContainerReacquisitionContext ctx)
if (pidPath == null) {
LOG.warn("{} is not active, returning terminated error", containerId);
-
+ containerExitCode = ExitCode.TERMINATED.getExitCode();
return ExitCode.TERMINATED.getExitCode();
}
@@ -335,7 +336,7 @@ public int reacquireContainer(ContainerReacquisitionContext ctx)
while (!file.exists() && msecLeft >= 0) {
if (!isContainerActive(containerId)) {
LOG.info("{} was deactivated", containerId);
-
+ containerExitCode = ExitCode.TERMINATED.getExitCode();
return ExitCode.TERMINATED.getExitCode();
}
@@ -350,7 +351,9 @@ public int reacquireContainer(ContainerReacquisitionContext ctx)
}
try {
- return Integer.parseInt(FileUtils.readFileToString(file, StandardCharsets.UTF_8).trim());
+ containerExitCode = Integer.parseInt(
+ FileUtils.readFileToString(file, StandardCharsets.UTF_8).trim());
+ return containerExitCode;
} catch (NumberFormatException e) {
throw new IOException("Error parsing exit code from pid " + pid, e);
}
@@ -453,9 +456,7 @@ public void writeLaunchEnv(OutputStream out, Map environment,
}
// dump debugging information if configured
- if (getConf() != null &&
- getConf().getBoolean(YarnConfiguration.NM_LOG_CONTAINER_DEBUG_INFO,
- YarnConfiguration.DEFAULT_NM_LOG_CONTAINER_DEBUG_INFO)) {
+ if (shouldWriteDebugInformation(getConf())) {
sb.echo("Copying debugging information");
sb.copyDebugInformation(new Path(outFilename),
new Path(logDir, outFilename));
@@ -488,6 +489,18 @@ protected File[] readDirAsUser(String user, Path dir) {
return new File(dir.toString()).listFiles();
}
+ private boolean shouldWriteDebugInformation(Configuration config) {
+ return config != null && (
+ config.getBoolean(
+ YarnConfiguration.NM_LOG_CONTAINER_DEBUG_INFO,
+ YarnConfiguration.DEFAULT_NM_LOG_CONTAINER_DEBUG_INFO
+ ) || (
+ config.getBoolean(
+ YarnConfiguration.NM_LOG_CONTAINER_DEBUG_INFO_ON_ERROR,
+ YarnConfiguration.DEFAULT_NM_LOG_CONTAINER_DEBUG_INFO_ON_ERROR
+ ) && containerExitCode != 0));
+ }
+
/**
* The container exit code.
*/
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
index bd135ff519..6971d34b9d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
@@ -1844,6 +1844,63 @@ public void testDebuggingInformation() throws IOException {
}
}
+ @Test
+ public void testDebuggingInformationOnError() throws IOException {
+ File shellFile = null;
+ File tempFile = null;
+ Configuration conf = new YarnConfiguration();
+ try {
+ shellFile = Shell.appendScriptExtension(tmpDir, "hello");
+ tempFile = Shell.appendScriptExtension(tmpDir, "temp");
+ String testCommand = Shell.WINDOWS ? "@echo \"hello\"" : "echo \"hello\"";
+ PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile));
+ FileUtil.setExecutable(shellFile, true);
+ writer.println(testCommand);
+ writer.close();
+ Map> resources = new HashMap<>();
+ Map env = new HashMap<>();
+ List commands = new ArrayList<>();
+ if (Shell.WINDOWS) {
+ commands.add("cmd");
+ commands.add("/c");
+ commands.add("\"" + shellFile.getAbsolutePath() + "\"");
+ } else {
+ commands.add("/bin/sh \\\"" + shellFile.getAbsolutePath() + "\\\"");
+ }
+ conf.setBoolean(YarnConfiguration.NM_LOG_CONTAINER_DEBUG_INFO, false);
+ conf.setBoolean(YarnConfiguration.NM_LOG_CONTAINER_DEBUG_INFO_ON_ERROR, true);
+ FileOutputStream fos = new FileOutputStream(tempFile);
+ ContainerExecutor exec = new DefaultContainerExecutor();
+ exec.setConf(conf);
+ LinkedHashSet nmVars = new LinkedHashSet<>();
+ exec.writeLaunchEnv(fos, env, resources, commands,
+ new Path(localLogDir.getAbsolutePath()), "user",
+ tempFile.getName(), nmVars);
+ fos.flush();
+ fos.close();
+ FileUtil.setExecutable(tempFile, true);
+ Shell.ShellCommandExecutor shexc = new Shell.ShellCommandExecutor(
+ new String[]{tempFile.getAbsolutePath()}, tmpDir);
+ shexc.execute();
+ assertThat(shexc.getExitCode()).isZero();
+ File directorInfo =
+ new File(localLogDir, ContainerExecutor.DIRECTORY_CONTENTS);
+ File scriptCopy = new File(localLogDir, tempFile.getName());
+ Assert.assertFalse("Directory info file missing",
+ directorInfo.exists());
+ Assert.assertFalse("Copy of launch script missing",
+ scriptCopy.exists());
+ } finally {
+ // cleanup
+ if (shellFile != null && shellFile.exists()) {
+ shellFile.delete();
+ }
+ if (tempFile != null && tempFile.exists()) {
+ tempFile.delete();
+ }
+ }
+ }
+
/**
* Test container launch fault.
* @throws Exception