From 0b77262890d76b0a3a35fa64befc8a406bc70b27 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Fri, 23 Jun 2017 13:26:03 -0700 Subject: [PATCH] YARN-5876. TestResourceTrackerService#testGracefulDecommissionWithApp fails intermittently on trunk. (Robert Kanter via Yufei Gu) --- .../yarn/server/resourcemanager/MockRM.java | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 2ff4fb2a9a..23009db11c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -120,7 +120,7 @@ public class MockRM extends ResourceManager { private static final int SECOND = 1000; private static final int TIMEOUT_MS_FOR_ATTEMPT = 40 * SECOND; private static final int TIMEOUT_MS_FOR_APP_REMOVED = 40 * SECOND; - private static final int TIMEOUT_MS_FOR_CONTAINER_AND_NODE = 10 * SECOND; + private static final int TIMEOUT_MS_FOR_CONTAINER_AND_NODE = 20 * SECOND; private static final int WAIT_MS_PER_LOOP = 10; private final boolean useNullRMNodeLabelsManager; @@ -853,9 +853,17 @@ public void sendNodeLost(MockNM nm) throws Exception { drainEventsImplicitly(); } + private RMNode getRMNode(NodeId nodeId) { + RMNode node = getRMContext().getRMNodes().get(nodeId); + if (node == null) { + node = getRMContext().getInactiveRMNodes().get(nodeId); + } + return node; + } + /** * Wait until a node has reached a specified state. - * The timeout is 10 seconds. + * The timeout is 20 seconds. * @param nodeId the id of a node * @param finalState the node state waited * @throws InterruptedException @@ -864,12 +872,17 @@ public void sendNodeLost(MockNM nm) throws Exception { public void waitForState(NodeId nodeId, NodeState finalState) throws InterruptedException { drainEventsImplicitly(); - RMNode node = getRMContext().getRMNodes().get(nodeId); - if (node == null) { - node = getRMContext().getInactiveRMNodes().get(nodeId); - } - Assert.assertNotNull("node shouldn't be null", node); int timeWaiting = 0; + RMNode node = getRMNode(nodeId); + while (node == null) { + if (timeWaiting >= TIMEOUT_MS_FOR_CONTAINER_AND_NODE) { + break; + } + node = getRMNode(nodeId); + Thread.sleep(WAIT_MS_PER_LOOP); + timeWaiting += WAIT_MS_PER_LOOP; + } + Assert.assertNotNull("node shouldn't be null (timedout)", node); while (!finalState.equals(node.getState())) { if (timeWaiting >= TIMEOUT_MS_FOR_CONTAINER_AND_NODE) { break;