diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index aad620e075..8a1d076455 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -275,6 +275,8 @@ Release 2.0.5-beta - UNRELEASED YARN-547. Fixed race conditions in public and private resource localization which used to cause duplicate downloads. (Omkar Vinit Joshi via vinodkv) + YARN-594. Update test and add comments in YARN-534 (Jian He via bikas) + Release 2.0.4-alpha - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 8a92ab1023..4dcb6f2ab8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -340,6 +340,8 @@ public class RMAppManager implements EventHandler, } else { maxAppAttempts = individualMaxAppAttempts; } + // In work-preserve restart, if attemptCount == maxAttempts, the job still + // needs to be recovered because the last attempt may still be running. if(appState.getAttemptCount() >= maxAppAttempts) { LOG.info("Not recovering application " + appState.getAppId() + " due to recovering attempt is beyond maxAppAttempt limit"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index 6e75297be7..4ab8901054 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -364,7 +364,6 @@ public class TestRMRestart { Assert.assertNotNull(attemptState); Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId()); - rm1.stop(); // start new RM MockRM rm2 = new MockRM(conf, memStore); @@ -382,7 +381,12 @@ public class TestRMRestart { Assert.assertNull(rm2.getRMContext().getRMApps() .get(app1.getApplicationId())); - // stop the RM + // verify that app2 is stored, app1 is removed + Assert.assertNotNull(rmAppState.get(app2.getApplicationId())); + Assert.assertNull(rmAppState.get(app1.getApplicationId())); + + // stop the RM + rm1.stop(); rm2.stop(); } }