HDFS-7257. Add the time of last HA state transition to NN's /jmx page. Contributed by Charles Lamb.

This commit is contained in:
Haohui Mai 2014-10-23 16:52:58 -07:00
parent 828429dec1
commit 670879ef41
5 changed files with 44 additions and 1 deletions

View File

@ -297,6 +297,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7222. Expose DataNode network errors as a metric. (Charles Lamb via wang) HDFS-7222. Expose DataNode network errors as a metric. (Charles Lamb via wang)
HDFS-7257. Add the time of last HA state transition to NN's /jmx page.
(Charles Lamb via wheat9)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -1619,6 +1619,11 @@ public boolean isSecurityEnabled() {
return UserGroupInformation.isSecurityEnabled(); return UserGroupInformation.isSecurityEnabled();
} }
@Override // NameNodeStatusMXBean
public long getLastHATransitionTime() {
return state.getLastHATransitionTime();
}
/** /**
* Shutdown the NN immediately in an ungraceful way. Used when it would be * Shutdown the NN immediately in an ungraceful way. Used when it would be
* unsafe for the NN to continue operating, e.g. during a failed HA state * unsafe for the NN to continue operating, e.g. during a failed HA state

View File

@ -54,4 +54,11 @@ public interface NameNodeStatusMXBean {
* @return true, if security is enabled. * @return true, if security is enabled.
*/ */
public boolean isSecurityEnabled(); public boolean isSecurityEnabled();
/**
* Gets the most recent HA transition time in milliseconds from the epoch.
*
* @return the most recent HA transition time in milliseconds from the epoch.
*/
public long getLastHATransitionTime();
} }

View File

@ -21,8 +21,8 @@
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.util.Time;
/** /**
* Namenode base state to implement state machine pattern. * Namenode base state to implement state machine pattern.
@ -30,6 +30,7 @@
@InterfaceAudience.Private @InterfaceAudience.Private
abstract public class HAState { abstract public class HAState {
protected final HAServiceState state; protected final HAServiceState state;
private long lastHATransitionTime;
/** /**
* Constructor * Constructor
@ -61,11 +62,25 @@ protected final void setStateInternal(final HAContext context, final HAState s)
exitState(context); exitState(context);
context.setState(s); context.setState(s);
s.enterState(context); s.enterState(context);
s.updateLastHATransitionTime();
} finally { } finally {
context.writeUnlock(); context.writeUnlock();
} }
} }
/**
* Gets the most recent HA transition time in milliseconds from the epoch.
*
* @return the most recent HA transition time in milliseconds from the epoch.
*/
public long getLastHATransitionTime() {
return lastHATransitionTime;
}
private void updateLastHATransitionTime() {
lastHATransitionTime = Time.now();
}
/** /**
* Method to be overridden by subclasses to prepare to enter a state. * Method to be overridden by subclasses to prepare to enter a state.
* This method is called <em>without</em> the context being locked, * This method is called <em>without</em> the context being locked,

View File

@ -30,6 +30,10 @@
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
import org.junit.Test; import org.junit.Test;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import java.lang.management.ManagementFactory;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
@ -62,6 +66,12 @@ public void testHAMetrics() throws Exception {
assertTrue(0 < nn1.getMillisSinceLastLoadedEdits()); assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
cluster.transitionToActive(0); cluster.transitionToActive(0);
final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
final ObjectName mxbeanName =
new ObjectName("Hadoop:service=NameNode,name=NameNodeStatus");
final Long ltt1 =
(Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
assertTrue("lastHATransitionTime should be > 0", ltt1 > 0);
assertEquals("active", nn0.getHAState()); assertEquals("active", nn0.getHAState());
assertEquals(0, nn0.getMillisSinceLastLoadedEdits()); assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
@ -69,6 +79,9 @@ public void testHAMetrics() throws Exception {
assertTrue(0 < nn1.getMillisSinceLastLoadedEdits()); assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
cluster.transitionToStandby(0); cluster.transitionToStandby(0);
final Long ltt2 =
(Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
assertTrue("lastHATransitionTime should be > " + ltt1, ltt2 > ltt1);
cluster.transitionToActive(1); cluster.transitionToActive(1);
assertEquals("standby", nn0.getHAState()); assertEquals("standby", nn0.getHAState());