From 670879ef414e704f48491805d9af1b0ac5bb4329 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Thu, 23 Oct 2014 16:52:58 -0700 Subject: [PATCH] HDFS-7257. Add the time of last HA state transition to NN's /jmx page. Contributed by Charles Lamb. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/namenode/NameNode.java | 5 +++++ .../server/namenode/NameNodeStatusMXBean.java | 7 +++++++ .../hadoop/hdfs/server/namenode/ha/HAState.java | 17 ++++++++++++++++- .../hdfs/server/namenode/ha/TestHAMetrics.java | 13 +++++++++++++ 5 files changed, 44 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c7dce00ab3..d3da6d8f14 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -297,6 +297,9 @@ Release 2.7.0 - UNRELEASED HDFS-7222. Expose DataNode network errors as a metric. (Charles Lamb via wang) + HDFS-7257. Add the time of last HA state transition to NN's /jmx page. + (Charles Lamb via wheat9) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 4e61f05355..a71d158b11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -1619,6 +1619,11 @@ public boolean isSecurityEnabled() { return UserGroupInformation.isSecurityEnabled(); } + @Override // NameNodeStatusMXBean + public long getLastHATransitionTime() { + return state.getLastHATransitionTime(); + } + /** * Shutdown the NN immediately in an ungraceful way. Used when it would be * unsafe for the NN to continue operating, e.g. during a failed HA state diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java index f52407fbd5..e3f712afaf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeStatusMXBean.java @@ -54,4 +54,11 @@ public interface NameNodeStatusMXBean { * @return true, if security is enabled. */ public boolean isSecurityEnabled(); + + /** + * Gets the most recent HA transition time in milliseconds from the epoch. + * + * @return the most recent HA transition time in milliseconds from the epoch. + */ + public long getLastHATransitionTime(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java index 34a5da272e..f30071018f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java @@ -21,8 +21,8 @@ import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; -import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException; import org.apache.hadoop.ipc.StandbyException; +import org.apache.hadoop.util.Time; /** * Namenode base state to implement state machine pattern. @@ -30,6 +30,7 @@ @InterfaceAudience.Private abstract public class HAState { protected final HAServiceState state; + private long lastHATransitionTime; /** * Constructor @@ -61,11 +62,25 @@ protected final void setStateInternal(final HAContext context, final HAState s) exitState(context); context.setState(s); s.enterState(context); + s.updateLastHATransitionTime(); } finally { context.writeUnlock(); } } + /** + * Gets the most recent HA transition time in milliseconds from the epoch. + * + * @return the most recent HA transition time in milliseconds from the epoch. + */ + public long getLastHATransitionTime() { + return lastHATransitionTime; + } + + private void updateLastHATransitionTime() { + lastHATransitionTime = Time.now(); + } + /** * Method to be overridden by subclasses to prepare to enter a state. * This method is called without the context being locked, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java index 1cd76f48fc..6f9fc6e0ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java @@ -30,6 +30,10 @@ import org.apache.hadoop.io.IOUtils; import org.junit.Test; +import javax.management.MBeanServer; +import javax.management.ObjectName; +import java.lang.management.ManagementFactory; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -62,6 +66,12 @@ public void testHAMetrics() throws Exception { assertTrue(0 < nn1.getMillisSinceLastLoadedEdits()); cluster.transitionToActive(0); + final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + final ObjectName mxbeanName = + new ObjectName("Hadoop:service=NameNode,name=NameNodeStatus"); + final Long ltt1 = + (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime"); + assertTrue("lastHATransitionTime should be > 0", ltt1 > 0); assertEquals("active", nn0.getHAState()); assertEquals(0, nn0.getMillisSinceLastLoadedEdits()); @@ -69,6 +79,9 @@ public void testHAMetrics() throws Exception { assertTrue(0 < nn1.getMillisSinceLastLoadedEdits()); cluster.transitionToStandby(0); + final Long ltt2 = + (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime"); + assertTrue("lastHATransitionTime should be > " + ltt1, ltt2 > ltt1); cluster.transitionToActive(1); assertEquals("standby", nn0.getHAState());