YARN-11529 Add metrics for ContainerMonitorImpl. (#5828)
This commit is contained in:
parent
680af87377
commit
325f7e5fdb
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
|||||||
|
|
||||||
import org.apache.hadoop.classification.VisibleForTesting;
|
import org.apache.hadoop.classification.VisibleForTesting;
|
||||||
import org.apache.hadoop.util.Preconditions;
|
import org.apache.hadoop.util.Preconditions;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
||||||
@ -497,6 +498,7 @@ public class ContainersMonitorImpl extends AbstractService implements
|
|||||||
public void run() {
|
public void run() {
|
||||||
|
|
||||||
while (!stopped && !Thread.currentThread().isInterrupted()) {
|
while (!stopped && !Thread.currentThread().isInterrupted()) {
|
||||||
|
long start = Time.monotonicNow();
|
||||||
// Print the processTrees for debugging.
|
// Print the processTrees for debugging.
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
StringBuilder tmp = new StringBuilder("[ ");
|
StringBuilder tmp = new StringBuilder("[ ");
|
||||||
@ -587,6 +589,9 @@ public class ContainersMonitorImpl extends AbstractService implements
|
|||||||
// Save the aggregated utilization of the containers
|
// Save the aggregated utilization of the containers
|
||||||
setContainersUtilization(trackedContainersUtilization);
|
setContainersUtilization(trackedContainersUtilization);
|
||||||
|
|
||||||
|
long duration = Time.monotonicNow() - start;
|
||||||
|
LOG.debug("Finished monitoring container cost {} ms", duration);
|
||||||
|
|
||||||
// Publish the container utilization metrics to node manager
|
// Publish the container utilization metrics to node manager
|
||||||
// metrics system.
|
// metrics system.
|
||||||
NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics();
|
NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics();
|
||||||
@ -597,6 +602,7 @@ public class ContainersMonitorImpl extends AbstractService implements
|
|||||||
trackedContainersUtilization.getVirtualMemory());
|
trackedContainersUtilization.getVirtualMemory());
|
||||||
nmMetrics.setContainerCpuUtilization(
|
nmMetrics.setContainerCpuUtilization(
|
||||||
trackedContainersUtilization.getCPU());
|
trackedContainersUtilization.getCPU());
|
||||||
|
nmMetrics.addContainerMonitorCostTime(duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -118,6 +118,9 @@ public class NodeManagerMetrics {
|
|||||||
@Metric("Container localization time in milliseconds")
|
@Metric("Container localization time in milliseconds")
|
||||||
MutableRate localizationDurationMillis;
|
MutableRate localizationDurationMillis;
|
||||||
|
|
||||||
|
@Metric("ContainerMonitor time cost in milliseconds")
|
||||||
|
MutableGaugeLong containersMonitorCostTime;
|
||||||
|
|
||||||
// CHECKSTYLE:ON:VisibilityModifier
|
// CHECKSTYLE:ON:VisibilityModifier
|
||||||
|
|
||||||
private JvmMetrics jvmMetrics = null;
|
private JvmMetrics jvmMetrics = null;
|
||||||
@ -481,4 +484,9 @@ public class NodeManagerMetrics {
|
|||||||
public void localizationComplete(long downloadMillis) {
|
public void localizationComplete(long downloadMillis) {
|
||||||
localizationDurationMillis.add(downloadMillis);
|
localizationDurationMillis.add(downloadMillis);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addContainerMonitorCostTime(long duration) {
|
||||||
|
containersMonitorCostTime.incr(duration);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -130,9 +130,12 @@ public class TestNodeManagerMetrics {
|
|||||||
|
|
||||||
// Update resource and check available resource again
|
// Update resource and check available resource again
|
||||||
metrics.addResource(total);
|
metrics.addResource(total);
|
||||||
|
metrics.addContainerMonitorCostTime(200L);
|
||||||
|
|
||||||
MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
|
MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
|
||||||
assertGauge("AvailableGB", 12, rb);
|
assertGauge("AvailableGB", 12, rb);
|
||||||
assertGauge("AvailableVCores", 19, rb);
|
assertGauge("AvailableVCores", 19, rb);
|
||||||
|
assertGauge("ContainersMonitorCostTime", 200L, rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void checkMetrics(int launched, int completed, int failed,
|
public static void checkMetrics(int launched, int completed, int failed,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user