HDFS-17055 Export HAState as a metric from Namenode for monitoring (#5764)
This commit is contained in:
parent
a85272c33d
commit
03902f5ef0
@ -48,6 +48,7 @@
|
|||||||
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
||||||
import org.apache.hadoop.ipc.StandbyException;
|
import org.apache.hadoop.ipc.StandbyException;
|
||||||
import org.apache.hadoop.ipc.RPC;
|
import org.apache.hadoop.ipc.RPC;
|
||||||
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
|
||||||
@ -68,6 +69,7 @@
|
|||||||
* </ol>
|
* </ol>
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
|
@Metrics(context="dfs")
|
||||||
public class BackupNode extends NameNode {
|
public class BackupNode extends NameNode {
|
||||||
private static final String BN_ADDRESS_NAME_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY;
|
private static final String BN_ADDRESS_NAME_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY;
|
||||||
private static final String BN_ADDRESS_DEFAULT = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT;
|
private static final String BN_ADDRESS_DEFAULT = DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT;
|
||||||
|
@ -78,6 +78,8 @@
|
|||||||
import org.apache.hadoop.ipc.RetriableException;
|
import org.apache.hadoop.ipc.RetriableException;
|
||||||
import org.apache.hadoop.ipc.Server;
|
import org.apache.hadoop.ipc.Server;
|
||||||
import org.apache.hadoop.ipc.StandbyException;
|
import org.apache.hadoop.ipc.StandbyException;
|
||||||
|
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||||
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.util.MBeans;
|
import org.apache.hadoop.metrics2.util.MBeans;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
@ -252,6 +254,7 @@
|
|||||||
* NameNode state, for example partial blocksMap etc.
|
* NameNode state, for example partial blocksMap etc.
|
||||||
**********************************************************/
|
**********************************************************/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
|
@Metrics(context="dfs")
|
||||||
public class NameNode extends ReconfigurableBase implements
|
public class NameNode extends ReconfigurableBase implements
|
||||||
NameNodeStatusMXBean, TokenVerifier<DelegationTokenIdentifier> {
|
NameNodeStatusMXBean, TokenVerifier<DelegationTokenIdentifier> {
|
||||||
static{
|
static{
|
||||||
@ -1146,6 +1149,7 @@ protected NameNode(Configuration conf, NamenodeRole role)
|
|||||||
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE,
|
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE,
|
||||||
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT);
|
DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE_DEFAULT);
|
||||||
this.started.set(true);
|
this.started.set(true);
|
||||||
|
DefaultMetricsSystem.instance().register(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void stopAtException(Exception e){
|
private void stopAtException(Exception e){
|
||||||
@ -1216,6 +1220,7 @@ public void stop() {
|
|||||||
levelDBAliasMapServer.close();
|
levelDBAliasMapServer.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
started.set(false);
|
||||||
tracer.close();
|
tracer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2051,6 +2056,26 @@ synchronized HAServiceState getServiceState() {
|
|||||||
return state.getServiceState();
|
return state.getServiceState();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emit Namenode HA service state as an integer so that one can monitor NN HA
|
||||||
|
* state based on this metric.
|
||||||
|
*
|
||||||
|
* @return 0 when not fully started
|
||||||
|
* 1 for active or standalone (non-HA) NN
|
||||||
|
* 2 for standby
|
||||||
|
* 3 for observer
|
||||||
|
*
|
||||||
|
* These are the same integer values for the HAServiceState enum.
|
||||||
|
*/
|
||||||
|
@Metric({"NameNodeState", "Namenode HA service state"})
|
||||||
|
public int getNameNodeState() {
|
||||||
|
if (!isStarted() || state == null) {
|
||||||
|
return HAServiceState.INITIALIZING.ordinal();
|
||||||
|
}
|
||||||
|
|
||||||
|
return state.getServiceState().ordinal();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register NameNodeStatusMXBean
|
* Register NameNodeStatusMXBean
|
||||||
*/
|
*/
|
||||||
|
@ -153,7 +153,7 @@ public void testFinalize() throws Exception {
|
|||||||
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
|
UpgradeUtilities.createEmptyDirs(dataNodeDirs);
|
||||||
|
|
||||||
log("Finalize NN & BP with existing previous dir", numDirs);
|
log("Finalize NN & BP with existing previous dir", numDirs);
|
||||||
String bpid = UpgradeUtilities.getCurrentBlockPoolID(cluster);
|
String bpid = UpgradeUtilities.getCurrentBlockPoolID(null);
|
||||||
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
|
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
|
||||||
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
|
UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
|
||||||
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
|
UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
|
||||||
|
@ -328,7 +328,7 @@ public void testRollback() throws Exception {
|
|||||||
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
||||||
|
|
||||||
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs,
|
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs,
|
||||||
storageInfo, UpgradeUtilities.getCurrentBlockPoolID(cluster));
|
storageInfo, UpgradeUtilities.getCurrentBlockPoolID(null));
|
||||||
startNameNodeShouldFail("Cannot rollback to storage version 1 using this version");
|
startNameNodeShouldFail("Cannot rollback to storage version 1 using this version");
|
||||||
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
||||||
} // end numDir loop
|
} // end numDir loop
|
||||||
|
@ -349,7 +349,7 @@ public void testUpgrade() throws Exception {
|
|||||||
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
||||||
|
|
||||||
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
|
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
|
||||||
UpgradeUtilities.getCurrentBlockPoolID(cluster));
|
UpgradeUtilities.getCurrentBlockPoolID(null));
|
||||||
|
|
||||||
startNameNodeShouldFail(StartupOption.UPGRADE);
|
startNameNodeShouldFail(StartupOption.UPGRADE);
|
||||||
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
||||||
@ -362,7 +362,7 @@ public void testUpgrade() throws Exception {
|
|||||||
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
UpgradeUtilities.getCurrentFsscTime(null), NodeType.NAME_NODE);
|
||||||
|
|
||||||
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
|
UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs, storageInfo,
|
||||||
UpgradeUtilities.getCurrentBlockPoolID(cluster));
|
UpgradeUtilities.getCurrentBlockPoolID(null));
|
||||||
|
|
||||||
startNameNodeShouldFail(StartupOption.UPGRADE);
|
startNameNodeShouldFail(StartupOption.UPGRADE);
|
||||||
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
UpgradeUtilities.createEmptyDirs(nameNodeDirs);
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
@ -116,6 +117,7 @@ private NameNode makeNameNode(boolean enableMetricsLogging)
|
|||||||
/**
|
/**
|
||||||
* A NameNode that stubs out the NameSystem for testing.
|
* A NameNode that stubs out the NameSystem for testing.
|
||||||
*/
|
*/
|
||||||
|
@Metrics(context="dfs")
|
||||||
private static class TestNameNode extends NameNode {
|
private static class TestNameNode extends NameNode {
|
||||||
@Override
|
@Override
|
||||||
protected void loadNamesystem(Configuration conf) throws IOException {
|
protected void loadNamesystem(Configuration conf) throws IOException {
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode.ha;
|
package org.apache.hadoop.hdfs.server.namenode.ha;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
@ -29,6 +31,7 @@
|
|||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -176,4 +179,56 @@ public void testHAInodeCount() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the getNameNodeState() API added to NameNode.java.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetNameNodeState() throws IOException {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);
|
||||||
|
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(
|
||||||
|
MiniDFSNNTopology.simpleHATopology(3)).numDataNodes(1).build();
|
||||||
|
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
NameNode nn0 = cluster.getNameNode(0);
|
||||||
|
NameNode nn1 = cluster.getNameNode(1);
|
||||||
|
NameNode nn2 = cluster.getNameNode(2);
|
||||||
|
|
||||||
|
// All namenodes are in standby by default
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||||
|
nn0.getNameNodeState());
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||||
|
nn1.getNameNodeState());
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||||
|
nn2.getNameNodeState());
|
||||||
|
|
||||||
|
// Transition nn0 to be active
|
||||||
|
cluster.transitionToActive(0);
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.ACTIVE.ordinal(),
|
||||||
|
nn0.getNameNodeState());
|
||||||
|
|
||||||
|
// Transition nn1 to be active
|
||||||
|
cluster.transitionToStandby(0);
|
||||||
|
cluster.transitionToActive(1);
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.STANDBY.ordinal(),
|
||||||
|
nn0.getNameNodeState());
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.ACTIVE.ordinal(),
|
||||||
|
nn1.getNameNodeState());
|
||||||
|
|
||||||
|
// Transition nn2 to observer
|
||||||
|
cluster.transitionToObserver(2);
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.OBSERVER.ordinal(),
|
||||||
|
nn2.getNameNodeState());
|
||||||
|
|
||||||
|
// Shutdown nn2. Now getNameNodeState should return the INITIALIZING state.
|
||||||
|
cluster.shutdownNameNode(2);
|
||||||
|
assertEquals(HAServiceProtocol.HAServiceState.INITIALIZING.ordinal(),
|
||||||
|
nn2.getNameNodeState());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user