From 50bc7746d7593bf6adc92fc9807a99cf53b4aef3 Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Tue, 25 Sep 2018 11:31:14 +0530 Subject: [PATCH] YARN-8815. RM fails to recover finished unmanaged AM. Contributed by Bibin A Chundatt. --- .../recovery/RMStateStore.java | 1 + .../yarn/server/resourcemanager/MockRM.java | 9 +++++++++ .../server/resourcemanager/TestRMRestart.java | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 242b5d05ed..a0b10b1196 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -306,6 +306,7 @@ private void pruneAppState(ApplicationStateData appState) { context.setPriority(srcCtx.getPriority()); context.setApplicationTags(srcCtx.getApplicationTags()); context.setApplicationType(srcCtx.getApplicationType()); + context.setUnmanagedAM(srcCtx.getUnmanagedAM()); ContainerLaunchContextPBImpl amContainerSpec = new ContainerLaunchContextPBImpl(); amContainerSpec.setApplicationACLs( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 2ad439189f..6a024f5b92 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -599,6 +599,15 @@ public RMApp submitApp(Resource resource, String name, String user, true, false, false, null, 0, null, true, null); } + public RMApp submitApp(Resource resource, String name, String user, + Map acls, boolean unManaged, String queue) + throws Exception { + return submitApp(resource, name, user, acls, unManaged, queue, + super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null, null, true, + false, false, null, 0, null, true, null); + } + public RMApp submitApp(int masterMemory, String name, String user, Map acls, String queue, boolean waitForAccepted) throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index 9aa5c531ae..a8b8d89fc6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -456,6 +456,25 @@ public void testRMRestart() throws Exception { Assert.assertEquals(4, rmAppState.size()); } + @Test(timeout = 60000) + public void testUnManagedRMRestart() throws Exception { + // Create RM + MockRM rm1 = createMockRM(conf); + MemoryRMStateStore memStore = (MemoryRMStateStore) rm1.getRMStateStore(); + rm1.start(); + // create app and launch the AM + RMApp app0 = + rm1.submitApp(null, "name", "user", new HashMap<>(), true, "default"); + rm1.killApp(app0.getApplicationId()); + rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED); + // start new RM + MockRM rm2 = createMockRM(conf, memStore); + rm2.start(); + Assert.assertEquals(1, rm2.getRMContext().getRMApps().size()); + rm1.stop(); + rm2.stop(); + } + @Test (timeout = 60000) public void testRMRestartAppRunningAMFailed() throws Exception { conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,