HDFS-17055 Export HAState as a metric from Namenode for monitoring (#5764)
This commit is contained in:
parent
a85272c33d
commit
03902f5ef0
@ -48,6 +48,7 @@
|
||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||
import org.apache.hadoop.ipc.StandbyException;
|
||||
import org.apache.hadoop.ipc.RPC;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.apache.hadoop.net.NetUtils;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
|
||||
@ -68,6 +69,7 @@
|
||||
* </ol>
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
@Metrics(context="dfs")
|
||||
public class BackupNode extends NameNode {
|
||||
private static final String BN_ADDRESS_NAME_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY;
|
||||
private static final String BN_ADDRESS_DEFAULT = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT;
|
||||
|
@ -78,6 +78,8 @@
|
||||
import org.apache.hadoop.ipc.RetriableException;
|
||||
import org.apache.hadoop.ipc.Server;
|
||||
import org.apache.hadoop.ipc.StandbyException;
|
||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
import org.apache.hadoop.metrics2.util.MBeans;
|
||||
import org.apache.hadoop.net.NetUtils;
|
||||
@ -252,6 +254,7 @@
|
||||
* NameNode state, for example partial blocksMap etc.
|
||||
**********************************************************/
|
||||
@InterfaceAudience.Private
|
||||
@Metrics(context="dfs")
|
||||
public class NameNode extends ReconfigurableBase implements
|
||||
NameNodeStatusMXBean, TokenVerifier<DelegationTokenIdentifier> {
|
||||
static{
|
||||
@ -1146,6 +1149,7 @@ protected NameNode(Configuration conf, NamenodeRole role)
|
||||
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE,
|
||||
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT);
|
||||
this.started.set(true);
|
||||
DefaultMetricsSystem.instance().register(this);
|
||||
}
|
||||
|
||||
private void stopAtException(Exception e){
|
||||
@ -1216,6 +1220,7 @@ public void stop() {
|
||||
levelDBAliasMapServer.close();
|
||||
}
|
||||
}
|
||||
started.set(false);
|
||||
tracer.close();
|
||||
}
|
||||
|
||||
@ -2051,6 +2056,26 @@ synchronized HAServiceState getServiceState() {
|
||||
return state.getServiceState();
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit Namenode HA service state as an integer so that one can monitor NN HA
|
||||
* state based on this metric.
|
||||
*
|
||||
* @return 0 when not fully started
|
||||
* 1 for active or standalone (non-HA) NN
|
||||
* 2 for standby
|
||||
* 3 for observer
|
||||
*
|
||||
* These are the same integer values for the HAServiceState enum.
|
||||
*/
|
||||
@Metric({"NameNodeState", "Namenode HA service state"})
|
||||
public int getNameNodeState() {
|
||||
if (!isStarted() || state == null) {
|
||||
return HAServiceState.INITIALIZING.ordinal();
|
||||
}
|
||||
|
||||
return state.getServiceState().ordinal();
|
||||
}
|
||||
|
||||
/**
|
||||
* Register NameNodeStatusMXBean
|
||||
*/
|
||||
|
@ -153,7 +153,7 @@ public void testFinalize() throws Exception {
|
||||
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
|
||||
|
||||
log("Finalize NN & BP with existing previous dir", numDirs);
|
||||
String bpid = UpgradeUtilities.getCurrentBlockPoolID(cluster);
|
||||
String bpid = UpgradeUtilities.getCurrentBlockPoolID(null);
|
||||
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
|
||||
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
|
||||
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
|
||||
|
@ -328,7 +328,7 @@ public void testRollback() throws Exception {
|
||||
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
||||
|
||||
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs,
|
||||
storageInfo, UpgradeUtilities.getCurrentBlockPoolID(cluster));
|
||||
storageInfo, UpgradeUtilities.getCurrentBlockPoolID(null));
|
||||
startNameNodeShouldFail("Cannot rollback to storage version 1 using this version");
|
||||
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
||||
} // end numDir loop
|
||||
|
@ -349,7 +349,7 @@ public void testUpgrade() throws Exception {
|
||||
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
||||
|
||||
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
|
||||
UpgradeUtilities.getCurrentBlockPoolID(cluster));
|
||||
UpgradeUtilities.getCurrentBlockPoolID(null));
|
||||
|
||||
startNameNodeShouldFail(StartupOption.UPGRADE);
|
||||
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
||||
@ -362,7 +362,7 @@ public void testUpgrade() throws Exception {
|
||||
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
||||
|
||||
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
|
||||
UpgradeUtilities.getCurrentBlockPoolID(cluster));
|
||||
UpgradeUtilities.getCurrentBlockPoolID(null));
|
||||
|
||||
startNameNodeShouldFail(StartupOption.UPGRADE);
|
||||
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
||||
|
@ -19,6 +19,7 @@
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import java.util.function.Supplier;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@ -116,6 +117,7 @@ private NameNode makeNameNode(boolean enableMetricsLogging)
|
||||
/**
|
||||
* A NameNode that stubs out the NameSystem for testing.
|
||||
*/
|
||||
@Metrics(context="dfs")
|
||||
private static class TestNameNode extends NameNode {
|
||||
@Override
|
||||
protected void loadNamesystem(Configuration conf) throws IOException {
|
||||
|
@ -17,6 +17,8 @@
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode.ha;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@ -29,6 +31,7 @@
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -176,4 +179,56 @@ public void testHAInodeCount() throws Exception {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the getNameNodeState() API added to NameNode.java.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testGetNameNodeState() throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
|
||||
conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);
|
||||
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(
|
||||
MiniDFSNNTopology.simpleHATopology(3)).numDataNodes(1).build();
|
||||
|
||||
cluster.waitActive();
|
||||
|
||||
NameNode nn0 = cluster.getNameNode(0);
|
||||
NameNode nn1 = cluster.getNameNode(1);
|
||||
NameNode nn2 = cluster.getNameNode(2);
|
||||
|
||||
// All namenodes are in standby by default
|
||||
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||
nn0.getNameNodeState());
|
||||
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||
nn1.getNameNodeState());
|
||||
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||
nn2.getNameNodeState());
|
||||
|
||||
// Transition nn0 to be active
|
||||
cluster.transitionToActive(0);
|
||||
assertEquals(HAServiceProtocol.HAServiceState.ACTIVE.ordinal(),
|
||||
nn0.getNameNodeState());
|
||||
|
||||
// Transition nn1 to be active
|
||||
cluster.transitionToStandby(0);
|
||||
cluster.transitionToActive(1);
|
||||
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||
nn0.getNameNodeState());
|
||||
assertEquals(HAServiceProtocol.HAServiceState.ACTIVE.ordinal(),
|
||||
nn1.getNameNodeState());
|
||||
|
||||
// Transition nn2 to observer
|
||||
cluster.transitionToObserver(2);
|
||||
assertEquals(HAServiceProtocol.HAServiceState.OBSERVER.ordinal(),
|
||||
nn2.getNameNodeState());
|
||||
|
||||
// Shutdown nn2. Now getNameNodeState should return the INITIALIZING state.
|
||||
cluster.shutdownNameNode(2);
|
||||
assertEquals(HAServiceProtocol.HAServiceState.INITIALIZING.ordinal(),
|
||||
nn2.getNameNodeState());
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user