YARN-8815. RM fails to recover finished unmanaged AM. Contributed by Bibin A Chundatt.

This commit is contained in:
Rohith Sharma K S 2018-09-25 11:31:14 +05:30
parent d0bc6a27fd
commit 50bc7746d7
3 changed files with 29 additions and 0 deletions

View File

@ -306,6 +306,7 @@ public abstract class RMStateStore extends AbstractService {
context.setPriority(srcCtx.getPriority()); context.setPriority(srcCtx.getPriority());
context.setApplicationTags(srcCtx.getApplicationTags()); context.setApplicationTags(srcCtx.getApplicationTags());
context.setApplicationType(srcCtx.getApplicationType()); context.setApplicationType(srcCtx.getApplicationType());
context.setUnmanagedAM(srcCtx.getUnmanagedAM());
ContainerLaunchContextPBImpl amContainerSpec = ContainerLaunchContextPBImpl amContainerSpec =
new ContainerLaunchContextPBImpl(); new ContainerLaunchContextPBImpl();
amContainerSpec.setApplicationACLs( amContainerSpec.setApplicationACLs(

View File

@ -599,6 +599,15 @@ public class MockRM extends ResourceManager {
true, false, false, null, 0, null, true, null); true, false, false, null, 0, null, true, null);
} }
public RMApp submitApp(Resource resource, String name, String user,
Map<ApplicationAccessType, String> acls, boolean unManaged, String queue)
throws Exception {
return submitApp(resource, name, user, acls, unManaged, queue,
super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null, null, true,
false, false, null, 0, null, true, null);
}
public RMApp submitApp(int masterMemory, String name, String user, public RMApp submitApp(int masterMemory, String name, String user,
Map<ApplicationAccessType, String> acls, String queue, Map<ApplicationAccessType, String> acls, String queue,
boolean waitForAccepted) throws Exception { boolean waitForAccepted) throws Exception {

View File

@ -456,6 +456,25 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase {
Assert.assertEquals(4, rmAppState.size()); Assert.assertEquals(4, rmAppState.size());
} }
@Test(timeout = 60000)
public void testUnManagedRMRestart() throws Exception {
// Create RM
MockRM rm1 = createMockRM(conf);
MemoryRMStateStore memStore = (MemoryRMStateStore) rm1.getRMStateStore();
rm1.start();
// create app and launch the AM
RMApp app0 =
rm1.submitApp(null, "name", "user", new HashMap<>(), true, "default");
rm1.killApp(app0.getApplicationId());
rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
// start new RM
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
rm1.stop();
rm2.stop();
}
@Test (timeout = 60000) @Test (timeout = 60000)
public void testRMRestartAppRunningAMFailed() throws Exception { public void testRMRestartAppRunningAMFailed() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,