HDFS-15821. Add metrics for in-service datanodes (#2690). Contributed by Zehao Chen.
(cherry picked from commit 07a4220cd2
)
This commit is contained in:
parent
4468378e4b
commit
1768c0096f
@ -709,6 +709,11 @@ public int getNumEnteringMaintenanceDataNodes() {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumInServiceLiveDataNodes() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getVolumeFailuresTotal() {
|
public int getVolumeFailuresTotal() {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -5504,6 +5504,19 @@ public int getNumDecomDeadDataNodes() {
|
|||||||
return deadDecommissioned;
|
return deadDecommissioned;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override // FSNamesystemMBean
|
||||||
|
@Metric({"NumInServiceLiveDataNodes",
|
||||||
|
"Number of live datanodes which are currently in service"})
|
||||||
|
public int getNumInServiceLiveDataNodes() {
|
||||||
|
final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
|
||||||
|
getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
|
||||||
|
int liveInService = live.size();
|
||||||
|
for (DatanodeDescriptor node : live) {
|
||||||
|
liveInService -= node.isInMaintenance() ? 1 : 0;
|
||||||
|
}
|
||||||
|
return liveInService;
|
||||||
|
}
|
||||||
|
|
||||||
@Override // FSNamesystemMBean
|
@Override // FSNamesystemMBean
|
||||||
@Metric({"VolumeFailuresTotal",
|
@Metric({"VolumeFailuresTotal",
|
||||||
"Total number of volume failures across all Datanodes"})
|
"Total number of volume failures across all Datanodes"})
|
||||||
|
@ -150,6 +150,12 @@ public interface FSNamesystemMBean {
|
|||||||
*/
|
*/
|
||||||
public int getNumDecomDeadDataNodes();
|
public int getNumDecomDeadDataNodes();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Number of in-service data nodes, where NumInServiceDataNodes =
|
||||||
|
* NumLiveDataNodes - NumDecomLiveDataNodes - NumInMaintenanceLiveDataNodes
|
||||||
|
*/
|
||||||
|
int getNumInServiceLiveDataNodes();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of failed data volumes across all live data nodes.
|
* Number of failed data volumes across all live data nodes.
|
||||||
* @return number of failed data volumes across all live data nodes
|
* @return number of failed data volumes across all live data nodes
|
||||||
|
@ -435,6 +435,103 @@ public Boolean get() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 120000)
|
||||||
|
public void testInServiceNodes() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY,
|
||||||
|
30);
|
||||||
|
conf.setClass(DFSConfigKeys.DFS_NAMENODE_HOSTS_PROVIDER_CLASSNAME_KEY,
|
||||||
|
CombinedHostFileManager.class, HostConfigManager.class);
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
HostsFileWriter hostsFileWriter = new HostsFileWriter();
|
||||||
|
hostsFileWriter.initialize(conf, "temp/TestInServiceNodes");
|
||||||
|
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
final FSNamesystem fsn = cluster.getNameNode().namesystem;
|
||||||
|
final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
||||||
|
final ObjectName mxbeanName = new ObjectName(
|
||||||
|
"Hadoop:service=NameNode,name=FSNamesystem");
|
||||||
|
|
||||||
|
List<String> hosts = new ArrayList<>();
|
||||||
|
for (DataNode dn : cluster.getDataNodes()) {
|
||||||
|
hosts.add(dn.getDisplayName());
|
||||||
|
}
|
||||||
|
hostsFileWriter.initIncludeHosts(hosts.toArray(
|
||||||
|
new String[hosts.size()]));
|
||||||
|
fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
|
||||||
|
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
try {
|
||||||
|
int numLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
|
||||||
|
"NumLiveDataNodes");
|
||||||
|
return numLiveDataNodes == 3;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 1000, 60000);
|
||||||
|
|
||||||
|
// Verify nodes
|
||||||
|
int numDecomLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
|
||||||
|
"NumDecomLiveDataNodes");
|
||||||
|
int numInMaintenanceLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
|
||||||
|
"NumInMaintenanceLiveDataNodes");
|
||||||
|
int numInServiceLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
|
||||||
|
"NumInServiceLiveDataNodes");
|
||||||
|
assertEquals(0, numDecomLiveDataNodes);
|
||||||
|
assertEquals(0, numInMaintenanceLiveDataNodes);
|
||||||
|
assertEquals(3, numInServiceLiveDataNodes);
|
||||||
|
|
||||||
|
// Add 2 nodes to out-of-service list
|
||||||
|
ArrayList<String> decomNodes = new ArrayList<>();
|
||||||
|
decomNodes.add(cluster.getDataNodes().get(0).getDisplayName());
|
||||||
|
|
||||||
|
Map<String, Long> maintenanceNodes = new HashMap<>();
|
||||||
|
final int expirationInMs = 30 * 1000;
|
||||||
|
maintenanceNodes.put(cluster.getDataNodes().get(1).getDisplayName(),
|
||||||
|
Time.now() + expirationInMs);
|
||||||
|
|
||||||
|
hostsFileWriter.initOutOfServiceHosts(decomNodes, maintenanceNodes);
|
||||||
|
fsn.getBlockManager().getDatanodeManager().refreshNodes(conf);
|
||||||
|
|
||||||
|
// Wait for the DatanodeAdminManager to complete check
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
try {
|
||||||
|
int numLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
|
||||||
|
"NumLiveDataNodes");
|
||||||
|
int numDecomLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
|
||||||
|
"NumDecomLiveDataNodes");
|
||||||
|
int numInMaintenanceLiveDataNodes = (int) mbs.getAttribute(
|
||||||
|
mxbeanName, "NumInMaintenanceLiveDataNodes");
|
||||||
|
return numLiveDataNodes == 3 &&
|
||||||
|
numDecomLiveDataNodes == 1 &&
|
||||||
|
numInMaintenanceLiveDataNodes == 1;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 1000, 60000);
|
||||||
|
|
||||||
|
// Verify nodes
|
||||||
|
numInServiceLiveDataNodes = (int) mbs.getAttribute(mxbeanName,
|
||||||
|
"NumInServiceLiveDataNodes");
|
||||||
|
assertEquals(1, numInServiceLiveDataNodes);
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
hostsFileWriter.cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test (timeout = 120000)
|
@Test (timeout = 120000)
|
||||||
public void testMaintenanceNodes() throws Exception {
|
public void testMaintenanceNodes() throws Exception {
|
||||||
LOG.info("Starting testMaintenanceNodes");
|
LOG.info("Starting testMaintenanceNodes");
|
||||||
|
Loading…
Reference in New Issue
Block a user