YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager gets connection issue. Contributed by Xuan Gong

This commit is contained in:
Jian He 2015-09-02 17:45:23 -07:00
parent d31a41c359
commit 09c64ba1ba
9 changed files with 43 additions and 3 deletions

View File

@ -1141,4 +1141,8 @@ public String toString() {
((appData == null) ? "null" : StringUtils.byteToHexString(appData)) + ((appData == null) ? "null" : StringUtils.byteToHexString(appData)) +
" cb=" + appClient; " cb=" + appClient;
} }
public String getHAZookeeperConnectionState() {
return this.zkConnectionState.name();
}
} }

View File

@ -871,6 +871,9 @@ Release 2.7.2 - UNRELEASED
YARN-3893. Both RM in active state when Admin#transitionToActive failure YARN-3893. Both RM in active state when Admin#transitionToActive failure
from refeshAll() (Bibin A Chundatt via rohithsharmaks) from refeshAll() (Bibin A Chundatt via rohithsharmaks)
YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager
gets connection issue. (Xuan Gong via jianhe)
Release 2.7.1 - 2015-07-06 Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -782,4 +782,13 @@ public RefreshClusterMaxPriorityResponse refreshClusterMaxPriority(
throw logAndWrapException(e, user.getShortUserName(), argName, msg); throw logAndWrapException(e, user.getShortUserName(), argName, msg);
} }
} }
public String getHAZookeeperConnectionState() {
if (!rmContext.isHAEnabled()) {
return "ResourceManager HA is not enabled.";
} else if (!autoFailoverEnabled) {
return "Auto Failover is not enabled.";
}
return this.embeddedElector.getHAZookeeperConnectionState();
}
} }

View File

@ -205,4 +205,8 @@ public void resetLeaderElection() {
elector.quitElection(false); elector.quitElection(false);
elector.joinElection(localActiveNodeInfo); elector.joinElection(localActiveNodeInfo);
} }
public String getHAZookeeperConnectionState() {
return elector.getHAZookeeperConnectionState();
}
} }

View File

@ -44,6 +44,8 @@ protected void render(Block html) {
_("Cluster ID:", cinfo.getClusterId()). _("Cluster ID:", cinfo.getClusterId()).
_("ResourceManager state:", cinfo.getState()). _("ResourceManager state:", cinfo.getState()).
_("ResourceManager HA state:", cinfo.getHAState()). _("ResourceManager HA state:", cinfo.getHAState()).
_("ResourceManager HA zookeeper connection state:",
cinfo.getHAZookeeperConnectionState()).
_("ResourceManager RMStateStore:", cinfo.getRMStateStore()). _("ResourceManager RMStateStore:", cinfo.getRMStateStore()).
_("ResourceManager started on:", Times.format(cinfo.getStartedOn())). _("ResourceManager started on:", Times.format(cinfo.getStartedOn())).
_("ResourceManager version:", cinfo.getRMBuildVersion() + _("ResourceManager version:", cinfo.getRMBuildVersion() +

View File

@ -117,4 +117,9 @@ private String buildRedirectPath() {
} }
return path; return path;
} }
public String getHAZookeeperConnectionState() {
return rm.getRMContext().getRMAdminService()
.getHAZookeeperConnectionState();
}
} }

View File

@ -58,6 +58,7 @@ public class RMWebAppFilter extends GuiceContainer {
private String path; private String path;
private static final int BASIC_SLEEP_TIME = 5; private static final int BASIC_SLEEP_TIME = 5;
private static final int MAX_SLEEP_TIME = 5 * 60; private static final int MAX_SLEEP_TIME = 5 * 60;
private static final Random randnum = new Random();
@Inject @Inject
public RMWebAppFilter(Injector injector, Configuration conf) { public RMWebAppFilter(Injector injector, Configuration conf) {
@ -126,6 +127,8 @@ && shouldRedirect(rmWebApp, uri)) {
String redirectMsg = String redirectMsg =
doRetry ? "Can not find any active RM. Will retry in next " + next doRetry ? "Can not find any active RM. Will retry in next " + next
+ " seconds." : "There is no active RM right now."; + " seconds." : "There is no active RM right now.";
redirectMsg += "\nHA Zookeeper Connection State: "
+ rmWebApp.getHAZookeeperConnectionState();
PrintWriter out = response.getWriter(); PrintWriter out = response.getWriter();
out.println(redirectMsg); out.println(redirectMsg);
if (doRetry) { if (doRetry) {
@ -172,6 +175,6 @@ private String appendOrReplaceParamter(String uri, String newQuery) {
private static int calculateExponentialTime(int retries) { private static int calculateExponentialTime(int retries) {
long baseTime = BASIC_SLEEP_TIME * (1L << retries); long baseTime = BASIC_SLEEP_TIME * (1L << retries);
return (int) (baseTime * ((new Random()).nextDouble() + 0.5)); return (int) (baseTime * (randnum.nextDouble() + 0.5));
} }
} }

View File

@ -43,6 +43,7 @@ public class ClusterInfo {
protected String hadoopVersion; protected String hadoopVersion;
protected String hadoopBuildVersion; protected String hadoopBuildVersion;
protected String hadoopVersionBuiltOn; protected String hadoopVersionBuiltOn;
protected String haZooKeeperConnectionState;
public ClusterInfo() { public ClusterInfo() {
} // JAXB needs this } // JAXB needs this
@ -62,6 +63,8 @@ public ClusterInfo(ResourceManager rm) {
this.hadoopVersion = VersionInfo.getVersion(); this.hadoopVersion = VersionInfo.getVersion();
this.hadoopBuildVersion = VersionInfo.getBuildVersion(); this.hadoopBuildVersion = VersionInfo.getBuildVersion();
this.hadoopVersionBuiltOn = VersionInfo.getDate(); this.hadoopVersionBuiltOn = VersionInfo.getDate();
this.haZooKeeperConnectionState =
rm.getRMContext().getRMAdminService().getHAZookeeperConnectionState();
} }
public String getState() { public String getState() {
@ -108,4 +111,7 @@ public long getStartedOn() {
return this.startedOn; return this.startedOn;
} }
public String getHAZookeeperConnectionState() {
return this.haZooKeeperConnectionState;
}
} }

View File

@ -285,6 +285,8 @@ public void verifyClusterInfoXML(String xml) throws JSONException, Exception {
WebServicesTestUtils.getXmlLong(element, "startedOn"), WebServicesTestUtils.getXmlLong(element, "startedOn"),
WebServicesTestUtils.getXmlString(element, "state"), WebServicesTestUtils.getXmlString(element, "state"),
WebServicesTestUtils.getXmlString(element, "haState"), WebServicesTestUtils.getXmlString(element, "haState"),
WebServicesTestUtils.getXmlString(
element, "haZooKeeperConnectionState"),
WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"), WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"),
WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"), WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"),
WebServicesTestUtils.getXmlString(element, "hadoopVersion"), WebServicesTestUtils.getXmlString(element, "hadoopVersion"),
@ -300,9 +302,10 @@ public void verifyClusterInfo(JSONObject json) throws JSONException,
Exception { Exception {
assertEquals("incorrect number of elements", 1, json.length()); assertEquals("incorrect number of elements", 1, json.length());
JSONObject info = json.getJSONObject("clusterInfo"); JSONObject info = json.getJSONObject("clusterInfo");
assertEquals("incorrect number of elements", 11, info.length()); assertEquals("incorrect number of elements", 12, info.length());
verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"), verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"),
info.getString("state"), info.getString("haState"), info.getString("state"), info.getString("haState"),
info.getString("haZooKeeperConnectionState"),
info.getString("hadoopVersionBuiltOn"), info.getString("hadoopVersionBuiltOn"),
info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"), info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"),
info.getString("resourceManagerVersionBuiltOn"), info.getString("resourceManagerVersionBuiltOn"),
@ -312,7 +315,8 @@ public void verifyClusterInfo(JSONObject json) throws JSONException,
} }
public void verifyClusterGeneric(long clusterid, long startedon, public void verifyClusterGeneric(long clusterid, long startedon,
String state, String haState, String hadoopVersionBuiltOn, String state, String haState, String haZooKeeperConnectionState,
String hadoopVersionBuiltOn,
String hadoopBuildVersion, String hadoopVersion, String hadoopBuildVersion, String hadoopVersion,
String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion, String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion,
String resourceManagerVersion) { String resourceManagerVersion) {