YARN-1903. Set exit code and diagnostics when container is killed at NEW/LOCALIZING state. Contributed by Zhijie Shen
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1586522 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3b008baf35
commit
ed78328d50
@ -104,6 +104,9 @@ Release 2.4.1 - UNRELEASED
|
||||
verification of public cache files in Windows+local file-system environment.
|
||||
(Varun Vasudev via vinodkv)
|
||||
|
||||
YARN-1903. Set exit code and diagnostics when container is killed at
|
||||
NEW/LOCALIZING state. (Zhijie Shen via jianhe)
|
||||
|
||||
Release 2.4.0 - 2014-04-07
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -343,9 +343,10 @@ private void testContainerManagement(NMClientImpl nmClient,
|
||||
// getContainerStatus can be called after stopContainer
|
||||
try {
|
||||
// O is possible if CLEANUP_CONTAINER is executed too late
|
||||
// 137 is possible if the container is not terminated but killed
|
||||
testGetContainerStatus(container, i, ContainerState.COMPLETE,
|
||||
"Container killed by the ApplicationMaster.", Arrays.asList(
|
||||
new Integer[] {143, 0}));
|
||||
new Integer[] {137, 143, 0}));
|
||||
} catch (YarnException e) {
|
||||
// The exception is possible because, after the container is stopped,
|
||||
// it may be removed from NM's context.
|
||||
|
@ -47,6 +47,7 @@
|
||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent;
|
||||
@ -141,7 +142,7 @@ ContainerEventType.INIT_CONTAINER, new RequestResourcesTransition())
|
||||
ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
|
||||
UPDATE_DIAGNOSTICS_TRANSITION)
|
||||
.addTransition(ContainerState.NEW, ContainerState.DONE,
|
||||
ContainerEventType.KILL_CONTAINER, CONTAINER_DONE_TRANSITION)
|
||||
ContainerEventType.KILL_CONTAINER, new KillOnNewTransition())
|
||||
|
||||
// From LOCALIZING State
|
||||
.addTransition(ContainerState.LOCALIZING,
|
||||
@ -760,7 +761,9 @@ public void transition(ContainerImpl container, ContainerEvent event) {
|
||||
container.cleanup();
|
||||
container.metrics.endInitingContainer();
|
||||
ContainerKillEvent killEvent = (ContainerKillEvent) event;
|
||||
container.exitCode = ExitCode.TERMINATED.getExitCode();
|
||||
container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
|
||||
container.diagnostics.append("Container is killed before being launched.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -828,7 +831,6 @@ public void transition(ContainerImpl container, ContainerEvent event) {
|
||||
|
||||
/**
|
||||
* Handle the following transitions:
|
||||
* - NEW -> DONE upon KILL_CONTAINER
|
||||
* - {LOCALIZATION_FAILED, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE,
|
||||
* KILLING, CONTAINER_CLEANEDUP_AFTER_KILL}
|
||||
* -> DONE upon CONTAINER_RESOURCES_CLEANEDUP
|
||||
@ -849,6 +851,21 @@ public void transition(ContainerImpl container, ContainerEvent event) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the following transition:
|
||||
* - NEW -> DONE upon KILL_CONTAINER
|
||||
*/
|
||||
static class KillOnNewTransition extends ContainerDoneTransition {
|
||||
@Override
|
||||
public void transition(ContainerImpl container, ContainerEvent event) {
|
||||
ContainerKillEvent killEvent = (ContainerKillEvent) event;
|
||||
container.exitCode = ExitCode.TERMINATED.getExitCode();
|
||||
container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
|
||||
container.diagnostics.append("Container is killed before being launched.\n");
|
||||
super.transition(container, event);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update diagnostics, staying in the same state.
|
||||
*/
|
||||
|
@ -311,6 +311,45 @@ public void testCleanupOnKillRequest() throws Exception {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKillOnNew() throws Exception {
|
||||
WrappedContainer wc = null;
|
||||
try {
|
||||
wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
|
||||
assertEquals(ContainerState.NEW, wc.c.getContainerState());
|
||||
wc.killContainer();
|
||||
assertEquals(ContainerState.DONE, wc.c.getContainerState());
|
||||
assertEquals(ExitCode.TERMINATED.getExitCode(),
|
||||
wc.c.cloneAndGetContainerStatus().getExitStatus());
|
||||
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
|
||||
.contains("KillRequest"));
|
||||
} finally {
|
||||
if (wc != null) {
|
||||
wc.finished();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKillOnLocalizing() throws Exception {
|
||||
WrappedContainer wc = null;
|
||||
try {
|
||||
wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
|
||||
wc.initContainer();
|
||||
assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
|
||||
wc.killContainer();
|
||||
assertEquals(ContainerState.KILLING, wc.c.getContainerState());
|
||||
assertEquals(ExitCode.TERMINATED.getExitCode(),
|
||||
wc.c.cloneAndGetContainerStatus().getExitStatus());
|
||||
assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
|
||||
.contains("KillRequest"));
|
||||
} finally {
|
||||
if (wc != null) {
|
||||
wc.finished();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKillOnLocalizationFailed() throws Exception {
|
||||
WrappedContainer wc = null;
|
||||
|
Loading…
Reference in New Issue
Block a user