YARN-11529 Add metrics for ContainerMonitorImpl. (#5828)

This commit is contained in:
Xianming Lei 2023-07-13 09:50:41 +08:00 committed by GitHub
parent 680af87377
commit 325f7e5fdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 0 deletions

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
@ -497,6 +498,7 @@ public class ContainersMonitorImpl extends AbstractService implements
public void run() { public void run() {
while (!stopped && !Thread.currentThread().isInterrupted()) { while (!stopped && !Thread.currentThread().isInterrupted()) {
long start = Time.monotonicNow();
// Print the processTrees for debugging. // Print the processTrees for debugging.
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
StringBuilder tmp = new StringBuilder("[ "); StringBuilder tmp = new StringBuilder("[ ");
@ -587,6 +589,9 @@ public class ContainersMonitorImpl extends AbstractService implements
// Save the aggregated utilization of the containers // Save the aggregated utilization of the containers
setContainersUtilization(trackedContainersUtilization); setContainersUtilization(trackedContainersUtilization);
long duration = Time.monotonicNow() - start;
LOG.debug("Finished monitoring container cost {} ms", duration);
// Publish the container utilization metrics to node manager // Publish the container utilization metrics to node manager
// metrics system. // metrics system.
NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics(); NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics();
@ -597,6 +602,7 @@ public class ContainersMonitorImpl extends AbstractService implements
trackedContainersUtilization.getVirtualMemory()); trackedContainersUtilization.getVirtualMemory());
nmMetrics.setContainerCpuUtilization( nmMetrics.setContainerCpuUtilization(
trackedContainersUtilization.getCPU()); trackedContainersUtilization.getCPU());
nmMetrics.addContainerMonitorCostTime(duration);
} }
try { try {

View File

@ -118,6 +118,9 @@ public class NodeManagerMetrics {
@Metric("Container localization time in milliseconds") @Metric("Container localization time in milliseconds")
MutableRate localizationDurationMillis; MutableRate localizationDurationMillis;
@Metric("ContainerMonitor time cost in milliseconds")
MutableGaugeLong containersMonitorCostTime;
// CHECKSTYLE:ON:VisibilityModifier // CHECKSTYLE:ON:VisibilityModifier
private JvmMetrics jvmMetrics = null; private JvmMetrics jvmMetrics = null;
@ -481,4 +484,9 @@ public class NodeManagerMetrics {
public void localizationComplete(long downloadMillis) { public void localizationComplete(long downloadMillis) {
localizationDurationMillis.add(downloadMillis); localizationDurationMillis.add(downloadMillis);
} }
public void addContainerMonitorCostTime(long duration) {
containersMonitorCostTime.incr(duration);
}
} }

View File

@ -130,9 +130,12 @@ public class TestNodeManagerMetrics {
// Update resource and check available resource again // Update resource and check available resource again
metrics.addResource(total); metrics.addResource(total);
metrics.addContainerMonitorCostTime(200L);
MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics"); MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
assertGauge("AvailableGB", 12, rb); assertGauge("AvailableGB", 12, rb);
assertGauge("AvailableVCores", 19, rb); assertGauge("AvailableVCores", 19, rb);
assertGauge("ContainersMonitorCostTime", 200L, rb);
} }
public static void checkMetrics(int launched, int completed, int failed, public static void checkMetrics(int launched, int completed, int failed,