From 28bd63e92b2ef6717cd3dcbe04ac9d9ca5420cf4 Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Wed, 25 May 2016 10:15:50 +0530 Subject: [PATCH] YARN-5024. TestContainerResourceUsage#testUsageAfterAMRestartWithMultipleContainers random failure. Contributed by Bibin A Chundatt --- .../TestContainerResourceUsage.java | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java index 74c70d881e..430fc0f265 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java @@ -309,7 +309,7 @@ private void amRestartTests(boolean keepRunningContainers) nm.nodeHeartbeat(am0.getApplicationAttemptId(), amContainerId.getContainerId(), ContainerState.COMPLETE); rm.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED); - + rm.drainEvents(); long memorySeconds = 0; long vcoreSeconds = 0; @@ -331,7 +331,8 @@ private void amRestartTests(boolean keepRunningContainers) } else { // If keepRunningContainers is false, all live containers should now // be completed. Calculate the resource usage metrics for all of them. - for (RMContainer c : rmContainers) { + for (RMContainer c : rmContainers) { + waitforContainerCompletion(rm, nm, amContainerId, c); AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); memorySeconds += ru.getMemorySeconds(); vcoreSeconds += ru.getVcoreSeconds(); @@ -346,13 +347,11 @@ private void amRestartTests(boolean keepRunningContainers) Assert.assertFalse(attempt2.getAppAttemptId() .equals(am0.getApplicationAttemptId())); - // launch the new AM - //TODO explore a better way than sleeping for a while (YARN-4929) - Thread.sleep(1000); + rm.waitForState(attempt2.getAppAttemptId(), RMAppAttemptState.SCHEDULED); nm.nodeHeartbeat(true); MockAM am1 = rm.sendAMLaunched(attempt2.getAppAttemptId()); am1.registerAppAttempt(); - + rm.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.RUNNING); // allocate NUM_CONTAINERS containers am1.allocate("127.0.0.1", 1024, NUM_CONTAINERS, new ArrayList()); @@ -385,6 +384,7 @@ private void amRestartTests(boolean keepRunningContainers) // Calculate container usage metrics for second attempt. for (RMContainer c : rmContainers) { + waitforContainerCompletion(rm, nm, amContainerId, c); AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); memorySeconds += ru.getMemorySeconds(); vcoreSeconds += ru.getVcoreSeconds(); @@ -401,6 +401,20 @@ private void amRestartTests(boolean keepRunningContainers) return; } + private void waitforContainerCompletion(MockRM rm, MockNM nm, + ContainerId amContainerId, RMContainer container) throws Exception { + ContainerId containerId = container.getContainerId(); + if (null != rm.scheduler.getRMContainer(containerId)) { + if (containerId.equals(amContainerId)) { + rm.waitForState(nm, containerId, RMContainerState.COMPLETED); + } else { + rm.waitForState(nm, containerId, RMContainerState.KILLED); + } + } else { + rm.drainEvents(); + } + } + private AggregateAppResourceUsage calculateContainerResourceMetrics( RMContainer rmContainer) { Resource resource = rmContainer.getContainer().getResource();