YARN-11529 Add metrics for ContainerMonitorImpl. (#5828)

This commit is contained in:
Xianming Lei 2023-07-13 09:50:41 +08:00 committed by GitHub
parent 680af87377
commit 325f7e5fdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 0 deletions

View File

@ -20,6 +20,7 @@
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
@ -497,6 +498,7 @@ private class MonitoringThread extends Thread {
public void run() {
while (!stopped && !Thread.currentThread().isInterrupted()) {
long start = Time.monotonicNow();
// Print the processTrees for debugging.
if (LOG.isDebugEnabled()) {
StringBuilder tmp = new StringBuilder("[ ");
@ -587,6 +589,9 @@ public void run() {
// Save the aggregated utilization of the containers
setContainersUtilization(trackedContainersUtilization);
long duration = Time.monotonicNow() - start;
LOG.debug("Finished monitoring container cost {} ms", duration);
// Publish the container utilization metrics to node manager
// metrics system.
NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics();
@ -597,6 +602,7 @@ public void run() {
trackedContainersUtilization.getVirtualMemory());
nmMetrics.setContainerCpuUtilization(
trackedContainersUtilization.getCPU());
nmMetrics.addContainerMonitorCostTime(duration);
}
try {

View File

@ -118,6 +118,9 @@ public class NodeManagerMetrics {
@Metric("Container localization time in milliseconds")
MutableRate localizationDurationMillis;
@Metric("ContainerMonitor time cost in milliseconds")
MutableGaugeLong containersMonitorCostTime;
// CHECKSTYLE:ON:VisibilityModifier
private JvmMetrics jvmMetrics = null;
@ -481,4 +484,9 @@ public void localizationCacheHitMiss(long size) {
public void localizationComplete(long downloadMillis) {
localizationDurationMillis.add(downloadMillis);
}
public void addContainerMonitorCostTime(long duration) {
containersMonitorCostTime.incr(duration);
}
}

View File

@ -130,9 +130,12 @@ public void testReferenceOfSingletonJvmMetrics() {
// Update resource and check available resource again
metrics.addResource(total);
metrics.addContainerMonitorCostTime(200L);
MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics");
assertGauge("AvailableGB", 12, rb);
assertGauge("AvailableVCores", 19, rb);
assertGauge("ContainersMonitorCostTime", 200L, rb);
}
public static void checkMetrics(int launched, int completed, int failed,