HDDS-917. Expose NodeManagerMXBean as a MetricsSource. Contributed by Siddharth Wagle.
This commit is contained in:
parent
688b177fc6
commit
091a664977
@ -102,7 +102,6 @@ public class SCMNodeManager implements NodeManager {
|
|||||||
public SCMNodeManager(OzoneConfiguration conf, String clusterID,
|
public SCMNodeManager(OzoneConfiguration conf, String clusterID,
|
||||||
StorageContainerManager scmManager, EventPublisher eventPublisher)
|
StorageContainerManager scmManager, EventPublisher eventPublisher)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this.metrics = SCMNodeMetrics.create();
|
|
||||||
this.nodeStateManager = new NodeStateManager(conf, eventPublisher);
|
this.nodeStateManager = new NodeStateManager(conf, eventPublisher);
|
||||||
this.clusterID = clusterID;
|
this.clusterID = clusterID;
|
||||||
this.version = VersionInfo.getLatestVersion();
|
this.version = VersionInfo.getLatestVersion();
|
||||||
@ -110,6 +109,7 @@ public SCMNodeManager(OzoneConfiguration conf, String clusterID,
|
|||||||
this.scmManager = scmManager;
|
this.scmManager = scmManager;
|
||||||
LOG.info("Entering startup chill mode.");
|
LOG.info("Entering startup chill mode.");
|
||||||
registerMXBean();
|
registerMXBean();
|
||||||
|
this.metrics = SCMNodeMetrics.create(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void registerMXBean() {
|
private void registerMXBean() {
|
||||||
|
@ -18,11 +18,24 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.hdds.scm.node;
|
package org.apache.hadoop.hdds.scm.node;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
|
||||||
|
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED;
|
||||||
|
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING;
|
||||||
|
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY;
|
||||||
|
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.metrics2.MetricsCollector;
|
||||||
|
import org.apache.hadoop.metrics2.MetricsInfo;
|
||||||
|
import org.apache.hadoop.metrics2.MetricsSource;
|
||||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
|
import org.apache.hadoop.metrics2.lib.Interns;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -30,7 +43,7 @@
|
|||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
@Metrics(about = "SCM NodeManager Metrics", context = "ozone")
|
@Metrics(about = "SCM NodeManager Metrics", context = "ozone")
|
||||||
public final class SCMNodeMetrics {
|
public final class SCMNodeMetrics implements MetricsSource {
|
||||||
|
|
||||||
private static final String SOURCE_NAME =
|
private static final String SOURCE_NAME =
|
||||||
SCMNodeMetrics.class.getSimpleName();
|
SCMNodeMetrics.class.getSimpleName();
|
||||||
@ -40,18 +53,26 @@ public final class SCMNodeMetrics {
|
|||||||
private @Metric MutableCounterLong numNodeReportProcessed;
|
private @Metric MutableCounterLong numNodeReportProcessed;
|
||||||
private @Metric MutableCounterLong numNodeReportProcessingFailed;
|
private @Metric MutableCounterLong numNodeReportProcessingFailed;
|
||||||
|
|
||||||
|
private final MetricsRegistry registry;
|
||||||
|
private final NodeManagerMXBean managerMXBean;
|
||||||
|
private final MetricsInfo recordInfo = Interns.info("SCMNodeManager",
|
||||||
|
"SCM NodeManager metrics");
|
||||||
|
|
||||||
/** Private constructor. */
|
/** Private constructor. */
|
||||||
private SCMNodeMetrics() { }
|
private SCMNodeMetrics(NodeManagerMXBean managerMXBean) {
|
||||||
|
this.managerMXBean = managerMXBean;
|
||||||
|
this.registry = new MetricsRegistry(recordInfo);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create and returns SCMNodeMetrics instance.
|
* Create and returns SCMNodeMetrics instance.
|
||||||
*
|
*
|
||||||
* @return SCMNodeMetrics
|
* @return SCMNodeMetrics
|
||||||
*/
|
*/
|
||||||
public static SCMNodeMetrics create() {
|
public static SCMNodeMetrics create(NodeManagerMXBean managerMXBean) {
|
||||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||||
return ms.register(SOURCE_NAME, "SCM NodeManager Metrics",
|
return ms.register(SOURCE_NAME, "SCM NodeManager Metrics",
|
||||||
new SCMNodeMetrics());
|
new SCMNodeMetrics(managerMXBean));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -90,4 +111,51 @@ void incNumNodeReportProcessingFailed() {
|
|||||||
numNodeReportProcessingFailed.incr();
|
numNodeReportProcessingFailed.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get aggregated counter and gauage metrics.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
@SuppressWarnings("SuspiciousMethodCalls")
|
||||||
|
public void getMetrics(MetricsCollector collector, boolean all) {
|
||||||
|
Map<String, Integer> nodeCount = managerMXBean.getNodeCount();
|
||||||
|
Map<String, Long> nodeInfo = managerMXBean.getNodeInfo();
|
||||||
|
|
||||||
|
registry.snapshot(
|
||||||
|
collector.addRecord(registry.info()) // Add annotated ones first
|
||||||
|
.addGauge(Interns.info(
|
||||||
|
"HealthyNodes",
|
||||||
|
"Number of healthy datanodes"),
|
||||||
|
nodeCount.get(HEALTHY.toString()))
|
||||||
|
.addGauge(Interns.info("StaleNodes",
|
||||||
|
"Number of stale datanodes"),
|
||||||
|
nodeCount.get(STALE.toString()))
|
||||||
|
.addGauge(Interns.info("DeadNodes",
|
||||||
|
"Number of dead datanodes"),
|
||||||
|
nodeCount.get(DEAD.toString()))
|
||||||
|
.addGauge(Interns.info("DecommissioningNodes",
|
||||||
|
"Number of decommissioning datanodes"),
|
||||||
|
nodeCount.get(DECOMMISSIONING.toString()))
|
||||||
|
.addGauge(Interns.info("DecommissionedNodes",
|
||||||
|
"Number of decommissioned datanodes"),
|
||||||
|
nodeCount.get(DECOMMISSIONED.toString()))
|
||||||
|
.addGauge(Interns.info("DiskCapacity",
|
||||||
|
"Total disk capacity"),
|
||||||
|
nodeInfo.get("DISKCapacity"))
|
||||||
|
.addGauge(Interns.info("DiskUsed",
|
||||||
|
"Total disk capacity used"),
|
||||||
|
nodeInfo.get("DISKUsed"))
|
||||||
|
.addGauge(Interns.info("DiskRemaining",
|
||||||
|
"Total disk capacity remaining"),
|
||||||
|
nodeInfo.get("DISKRemaining"))
|
||||||
|
.addGauge(Interns.info("SSDCapacity",
|
||||||
|
"Total ssd capacity"),
|
||||||
|
nodeInfo.get("SSDCapacity"))
|
||||||
|
.addGauge(Interns.info("SSDUsed",
|
||||||
|
"Total ssd capacity used"),
|
||||||
|
nodeInfo.get("SSDUsed"))
|
||||||
|
.addGauge(Interns.info("SSDRemaining",
|
||||||
|
"Total disk capacity remaining"),
|
||||||
|
nodeInfo.get("SSDRemaining")),
|
||||||
|
all);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,7 @@
|
|||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
|
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||||
|
|
||||||
@ -128,6 +129,45 @@ public void testNodeReportProcessingFailure() {
|
|||||||
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify that datanode aggregated state and capacity metrics are reported.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testNodeCountAndInfoMetricsReported() throws Exception {
|
||||||
|
HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0);
|
||||||
|
StorageReportProto storageReport = TestUtils.createStorageReport(
|
||||||
|
datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null);
|
||||||
|
NodeReportProto nodeReport = NodeReportProto.newBuilder()
|
||||||
|
.addStorageReport(storageReport).build();
|
||||||
|
datanode.getDatanodeStateMachine().getContext().addReport(nodeReport);
|
||||||
|
datanode.getDatanodeStateMachine().triggerHeartbeat();
|
||||||
|
// Give some time so that SCM receives and processes the heartbeat.
|
||||||
|
Thread.sleep(300L);
|
||||||
|
|
||||||
|
assertGauge("HealthyNodes", 1,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("StaleNodes", 0,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("DeadNodes", 0,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("DecommissioningNodes", 0,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("DecommissionedNodes", 0,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("DiskCapacity", 100L,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("DiskUsed", 10L,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("DiskRemaining", 90L,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("SSDCapacity", 0L,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("SSDUsed", 0L,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
assertGauge("SSDRemaining", 0L,
|
||||||
|
getMetrics(SCMNodeMetrics.class.getSimpleName()));
|
||||||
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void teardown() {
|
public void teardown() {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
|
Loading…
Reference in New Issue
Block a user