YARN-1405. Fixed ResourceManager to not hang when init/start fails with an exception w.r.t state-store. Contributed by Jian He.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1548992 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-12-08 04:30:32 +00:00
parent 305ae48136
commit 48fb53bc49
3 changed files with 29 additions and 9 deletions

View File

@ -219,6 +219,9 @@ Release 2.4.0 - UNRELEASED
YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its
dependency on distributed-shell. (Binglin Chang via vinodkv) dependency on distributed-shell. (Binglin Chang via vinodkv)
YARN-1405. Fixed ResourceManager to not hang when init/start fails with an
exception w.r.t state-store. (Jian He via vinodkv)
Release 2.3.0 - UNRELEASED Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -362,7 +362,7 @@ protected void serviceInit(Configuration configuration) throws Exception {
// the Exception from stateStore.init() needs to be handled for // the Exception from stateStore.init() needs to be handled for
// HA and we need to give up master status if we got fenced // HA and we need to give up master status if we got fenced
LOG.error("Failed to init state store", e); LOG.error("Failed to init state store", e);
ExitUtil.terminate(1, e); throw e;
} }
rmContext.setStateStore(rmStore); rmContext.setStateStore(rmStore);
@ -470,7 +470,7 @@ protected void serviceStart() throws Exception {
// the Exception from loadState() needs to be handled for // the Exception from loadState() needs to be handled for
// HA and we need to give up master status if we got fenced // HA and we need to give up master status if we got fenced
LOG.error("Failed to load/recover state", e); LOG.error("Failed to load/recover state", e);
ExitUtil.terminate(1, e); throw e;
} }
} }

View File

@ -34,7 +34,6 @@
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@ -46,7 +45,7 @@
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.service.Service.STATE;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@ -68,9 +67,6 @@
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
@ -94,7 +90,6 @@
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.mortbay.log.Log;
public class TestRMRestart { public class TestRMRestart {
@ -107,7 +102,6 @@ public class TestRMRestart {
public void setup() throws UnknownHostException { public void setup() throws UnknownHostException {
Logger rootLogger = LogManager.getRootLogger(); Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG); rootLogger.setLevel(Level.DEBUG);
ExitUtil.disableSystemExit();
conf = new YarnConfiguration(); conf = new YarnConfiguration();
UserGroupInformation.setConfiguration(conf); UserGroupInformation.setConfiguration(conf);
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true"); conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
@ -1477,6 +1471,29 @@ public void testFinishedAppRemovalAfterRMRestart() throws Exception {
rm2.stop(); rm2.stop();
} }
// This is to test RM does not get hang on shutdown.
@Test (timeout = 10000)
public void testRMShutdown() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore() {
@Override
public synchronized void checkVersion()
throws Exception {
throw new Exception("Invalid version.");
}
};
// start RM
memStore.init(conf);
MockRM rm1 = null;
try {
rm1 = new MockRM(conf, memStore);
rm1.start();
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e.getMessage().contains("Invalid version."));
}
Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
}
public static class TestSecurityMockRM extends MockRM { public static class TestSecurityMockRM extends MockRM {
public TestSecurityMockRM(Configuration conf, RMStateStore store) { public TestSecurityMockRM(Configuration conf, RMStateStore store) {