HDDS-917. Expose NodeManagerMXBean as a MetricsSource. Contributed by Siddharth Wagle.
This commit is contained in:
parent
688b177fc6
commit
091a664977
@ -102,7 +102,6 @@ public class SCMNodeManager implements NodeManager {
|
||||
public SCMNodeManager(OzoneConfiguration conf, String clusterID,
|
||||
StorageContainerManager scmManager, EventPublisher eventPublisher)
|
||||
throws IOException {
|
||||
this.metrics = SCMNodeMetrics.create();
|
||||
this.nodeStateManager = new NodeStateManager(conf, eventPublisher);
|
||||
this.clusterID = clusterID;
|
||||
this.version = VersionInfo.getLatestVersion();
|
||||
@ -110,6 +109,7 @@ public class SCMNodeManager implements NodeManager {
|
||||
this.scmManager = scmManager;
|
||||
LOG.info("Entering startup chill mode.");
|
||||
registerMXBean();
|
||||
this.metrics = SCMNodeMetrics.create(this);
|
||||
}
|
||||
|
||||
private void registerMXBean() {
|
||||
@ -118,7 +118,7 @@ public class SCMNodeManager implements NodeManager {
|
||||
}
|
||||
|
||||
private void unregisterMXBean() {
|
||||
if(this.nmInfoBean != null) {
|
||||
if (this.nmInfoBean != null) {
|
||||
MBeans.unregister(this.nmInfoBean);
|
||||
this.nmInfoBean = null;
|
||||
}
|
||||
|
@ -18,11 +18,24 @@
|
||||
|
||||
package org.apache.hadoop.hdds.scm.node;
|
||||
|
||||
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
|
||||
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED;
|
||||
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING;
|
||||
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
|
||||
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.metrics2.MetricsCollector;
|
||||
import org.apache.hadoop.metrics2.MetricsInfo;
|
||||
import org.apache.hadoop.metrics2.MetricsSource;
|
||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
import org.apache.hadoop.metrics2.lib.Interns;
|
||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||
|
||||
/**
|
||||
@ -30,7 +43,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
@Metrics(about = "SCM NodeManager Metrics", context = "ozone")
|
||||
public final class SCMNodeMetrics {
|
||||
public final class SCMNodeMetrics implements MetricsSource {
|
||||
|
||||
private static final String SOURCE_NAME =
|
||||
SCMNodeMetrics.class.getSimpleName();
|
||||
@ -40,18 +53,26 @@ public final class SCMNodeMetrics {
|
||||
private @Metric MutableCounterLong numNodeReportProcessed;
|
||||
private @Metric MutableCounterLong numNodeReportProcessingFailed;
|
||||
|
||||
private final MetricsRegistry registry;
|
||||
private final NodeManagerMXBean managerMXBean;
|
||||
private final MetricsInfo recordInfo = Interns.info("SCMNodeManager",
|
||||
"SCM NodeManager metrics");
|
||||
|
||||
/** Private constructor. */
|
||||
private SCMNodeMetrics() { }
|
||||
private SCMNodeMetrics(NodeManagerMXBean managerMXBean) {
|
||||
this.managerMXBean = managerMXBean;
|
||||
this.registry = new MetricsRegistry(recordInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and returns SCMNodeMetrics instance.
|
||||
*
|
||||
* @return SCMNodeMetrics
|
||||
*/
|
||||
public static SCMNodeMetrics create() {
|
||||
public static SCMNodeMetrics create(NodeManagerMXBean managerMXBean) {
|
||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||
return ms.register(SOURCE_NAME, "SCM NodeManager Metrics",
|
||||
new SCMNodeMetrics());
|
||||
new SCMNodeMetrics(managerMXBean));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -90,4 +111,51 @@ public final class SCMNodeMetrics {
|
||||
numNodeReportProcessingFailed.incr();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get aggregated counter and gauage metrics.
|
||||
*/
|
||||
@Override
|
||||
@SuppressWarnings("SuspiciousMethodCalls")
|
||||
public void getMetrics(MetricsCollector collector, boolean all) {
|
||||
Map<String, Integer> nodeCount = managerMXBean.getNodeCount();
|
||||
Map<String, Long> nodeInfo = managerMXBean.getNodeInfo();
|
||||
|
||||
registry.snapshot(
|
||||
collector.addRecord(registry.info()) // Add annotated ones first
|
||||
.addGauge(Interns.info(
|
||||
"HealthyNodes",
|
||||
"Number of healthy datanodes"),
|
||||
nodeCount.get(HEALTHY.toString()))
|
||||
.addGauge(Interns.info("StaleNodes",
|
||||
"Number of stale datanodes"),
|
||||
nodeCount.get(STALE.toString()))
|
||||
.addGauge(Interns.info("DeadNodes",
|
||||
"Number of dead datanodes"),
|
||||
nodeCount.get(DEAD.toString()))
|
||||
.addGauge(Interns.info("DecommissioningNodes",
|
||||
"Number of decommissioning datanodes"),
|
||||
nodeCount.get(DECOMMISSIONING.toString()))
|
||||
.addGauge(Interns.info("DecommissionedNodes",
|
||||
"Number of decommissioned datanodes"),
|
||||
nodeCount.get(DECOMMISSIONED.toString()))
|
||||
.addGauge(Interns.info("DiskCapacity",
|
||||
"Total disk capacity"),
|
||||
nodeInfo.get("DISKCapacity"))
|
||||
.addGauge(Interns.info("DiskUsed",
|
||||
"Total disk capacity used"),
|
||||
nodeInfo.get("DISKUsed"))
|
||||
.addGauge(Interns.info("DiskRemaining",
|
||||
"Total disk capacity remaining"),
|
||||
nodeInfo.get("DISKRemaining"))
|
||||
.addGauge(Interns.info("SSDCapacity",
|
||||
"Total ssd capacity"),
|
||||
nodeInfo.get("SSDCapacity"))
|
||||
.addGauge(Interns.info("SSDUsed",
|
||||
"Total ssd capacity used"),
|
||||
nodeInfo.get("SSDUsed"))
|
||||
.addGauge(Interns.info("SSDRemaining",
|
||||
"Total disk capacity remaining"),
|
||||
nodeInfo.get("SSDRemaining")),
|
||||
all);
|
||||
}
|
||||
}
|
||||
|
@ -35,6 +35,7 @@ import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||
|
||||
@ -128,6 +129,45 @@ public class TestSCMNodeMetrics {
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that datanode aggregated state and capacity metrics are reported.
|
||||
*/
|
||||
@Test
|
||||
public void testNodeCountAndInfoMetricsReported() throws Exception {
|
||||
HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
|
||||
StorageReportProto storageReport = TestUtils.createStorageReport(
|
||||
datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null);
|
||||
NodeReportProto nodeReport = NodeReportProto.newBuilder()
|
||||
.addStorageReport(storageReport).build();
|
||||
datanode.getDatanodeStateMachine().getContext().addReport(nodeReport);
|
||||
datanode.getDatanodeStateMachine().triggerHeartbeat();
|
||||
// Give some time so that SCM receives and processes the heartbeat.
|
||||
Thread.sleep(300L);
|
||||
|
||||
assertGauge("HealthyNodes", 1,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("StaleNodes", 0,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("DeadNodes", 0,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("DecommissioningNodes", 0,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("DecommissionedNodes", 0,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("DiskCapacity", 100L,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("DiskUsed", 10L,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("DiskRemaining", 90L,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("SSDCapacity", 0L,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("SSDUsed", 0L,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
assertGauge("SSDRemaining", 0L,
|
||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||
}
|
||||
|
||||
@After
|
||||
public void teardown() {
|
||||
cluster.shutdown();
|
||||
|
Loading…
x
Reference in New Issue
Block a user