From 62e9348bc10bb97a5fcb4281f7996a09d8e69c60 Mon Sep 17 00:00:00 2001 From: Junping Du Date: Thu, 3 Dec 2015 06:36:37 -0800 Subject: [PATCH] YARN-4408. Fix issue that NodeManager still reports negative running containers. Contributed by Robert Kanter. --- hadoop-yarn-project/CHANGES.txt | 3 + .../container/ContainerImpl.java | 7 ++- .../container/TestContainer.java | 59 ++++++++++++++++++- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 748a84116f..7258b364ad 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1084,6 +1084,9 @@ Release 2.8.0 - UNRELEASED YARN-4384. updateNodeResource CLI should not accept negative values for resource. (Junping Du via wangda) + YARN-4408. Fix issue that NodeManager reports negative running containers. + (Robert Kanter via junping_du) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index eff2188c93..e16ea93bcd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -1041,7 +1041,12 @@ static class ExitedWithSuccessToDoneTransition extends ContainerDoneTransition { @Override public void transition(ContainerImpl container, ContainerEvent event) { - container.metrics.endRunningContainer(); + if (container.wasLaunched) { + container.metrics.endRunningContainer(); + } else { + LOG.warn("Container exited with success despite being killed and not" + + "actually running"); + } container.metrics.completedContainer(); NMAuditLogger.logSuccess(container.user, AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 2834e30247..2ab9842b29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -397,7 +397,8 @@ public void testKillOnLocalizationFailed() throws Exception { } @Test - public void testKillOnLocalizedWhenContainerNotLaunched() throws Exception { + public void testKillOnLocalizedWhenContainerNotLaunchedContainerKilled() + throws Exception { WrappedContainer wc = null; try { wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); @@ -426,6 +427,62 @@ public void testKillOnLocalizedWhenContainerNotLaunched() throws Exception { } } + @Test + public void testKillOnLocalizedWhenContainerNotLaunchedContainerSuccess() + throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + wc.containerSuccessful(); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.EXITED_WITH_SUCCESS, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + wc.c.handle(new ContainerEvent(wc.c.getContainerId(), + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(0, metrics.getRunningContainers()); + } finally { + if (wc != null) { + wc.finished(); + } + } + } + + @Test + public void testKillOnLocalizedWhenContainerNotLaunchedContainerFailure() + throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + wc.containerFailed(ExitCode.FORCE_KILLED.getExitCode()); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + wc.c.handle(new ContainerEvent(wc.c.getContainerId(), + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + assertEquals(0, metrics.getRunningContainers()); + } finally { + if (wc != null) { + wc.finished(); + } + } + } + @Test public void testKillOnLocalizedWhenContainerLaunched() throws Exception { WrappedContainer wc = null;