YARN-1903. Set exit code and diagnostics when container is killed at NEW/LOCALIZING state. Contributed by Zhijie Shen

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1586522 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jian He 2014-04-11 01:26:36 +00:00
parent 3b008baf35
commit ed78328d50
4 changed files with 63 additions and 3 deletions

View File

@ -104,6 +104,9 @@ Release 2.4.1 - UNRELEASED
verification of public cache files in Windows+local file-system environment.
(Varun Vasudev via vinodkv)
YARN-1903. Set exit code and diagnostics when container is killed at
NEW/LOCALIZING state. (Zhijie Shen via jianhe)
Release 2.4.0 - 2014-04-07
INCOMPATIBLE CHANGES

View File

@ -343,9 +343,10 @@ private void testContainerManagement(NMClientImpl nmClient,
// getContainerStatus can be called after stopContainer
try {
// O is possible if CLEANUP_CONTAINER is executed too late
// 137 is possible if the container is not terminated but killed
testGetContainerStatus(container, i, ContainerState.COMPLETE,
"Container killed by the ApplicationMaster.", Arrays.asList(
new Integer[] {143, 0}));
new Integer[] {137, 143, 0}));
} catch (YarnException e) {
// The exception is possible because, after the container is stopped,
// it may be removed from NM's context.

View File

@ -47,6 +47,7 @@
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent;
@ -141,7 +142,7 @@ ContainerEventType.INIT_CONTAINER, new RequestResourcesTransition())
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
UPDATE_DIAGNOSTICS_TRANSITION)
.addTransition(ContainerState.NEW, ContainerState.DONE,
ContainerEventType.KILL_CONTAINER, CONTAINER_DONE_TRANSITION)
ContainerEventType.KILL_CONTAINER, new KillOnNewTransition())
// From LOCALIZING State
.addTransition(ContainerState.LOCALIZING,
@ -760,7 +761,9 @@ public void transition(ContainerImpl container, ContainerEvent event) {
container.cleanup();
container.metrics.endInitingContainer();
ContainerKillEvent killEvent = (ContainerKillEvent) event;
container.exitCode = ExitCode.TERMINATED.getExitCode();
container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
container.diagnostics.append("Container is killed before being launched.\n");
}
}
@ -828,7 +831,6 @@ public void transition(ContainerImpl container, ContainerEvent event) {
/**
* Handle the following transitions:
* - NEW -> DONE upon KILL_CONTAINER
* - {LOCALIZATION_FAILED, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE,
* KILLING, CONTAINER_CLEANEDUP_AFTER_KILL}
* -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
@ -849,6 +851,21 @@ public void transition(ContainerImpl container, ContainerEvent event) {
}
}
/**
* Handle the following transition:
* - NEW -> DONE upon KILL_CONTAINER
*/
static class KillOnNewTransition extends ContainerDoneTransition {
@Override
public void transition(ContainerImpl container, ContainerEvent event) {
ContainerKillEvent killEvent = (ContainerKillEvent) event;
container.exitCode = ExitCode.TERMINATED.getExitCode();
container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
container.diagnostics.append("Container is killed before being launched.\n");
super.transition(container, event);
}
}
/**
* Update diagnostics, staying in the same state.
*/

View File

@ -311,6 +311,45 @@ public void testCleanupOnKillRequest() throws Exception {
}
}
@Test
public void testKillOnNew() throws Exception {
WrappedContainer wc = null;
try {
wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
assertEquals(ContainerState.NEW, wc.c.getContainerState());
wc.killContainer();
assertEquals(ContainerState.DONE, wc.c.getContainerState());
assertEquals(ExitCode.TERMINATED.getExitCode(),
wc.c.cloneAndGetContainerStatus().getExitStatus());
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
.contains("KillRequest"));
} finally {
if (wc != null) {
wc.finished();
}
}
}
@Test
public void testKillOnLocalizing() throws Exception {
WrappedContainer wc = null;
try {
wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
wc.initContainer();
assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
wc.killContainer();
assertEquals(ContainerState.KILLING, wc.c.getContainerState());
assertEquals(ExitCode.TERMINATED.getExitCode(),
wc.c.cloneAndGetContainerStatus().getExitStatus());
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
.contains("KillRequest"));
} finally {
if (wc != null) {
wc.finished();
}
}
}
@Test
public void testKillOnLocalizationFailed() throws Exception {
WrappedContainer wc = null;