From 325f7e5fdb4f19704847b55eb17c054f5dad134f Mon Sep 17 00:00:00 2001 From: Xianming Lei <31424839+leixm@users.noreply.github.com> Date: Thu, 13 Jul 2023 09:50:41 +0800 Subject: [PATCH] YARN-11529 Add metrics for ContainerMonitorImpl. (#5828) --- .../containermanager/monitor/ContainersMonitorImpl.java | 6 ++++++ .../server/nodemanager/metrics/NodeManagerMetrics.java | 8 ++++++++ .../nodemanager/metrics/TestNodeManagerMetrics.java | 3 +++ 3 files changed, 17 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index eceb7b25e4..e82fcefb6a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -20,6 +20,7 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Preconditions; +import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; @@ -497,6 +498,7 @@ private class MonitoringThread extends Thread { public void run() { while (!stopped && !Thread.currentThread().isInterrupted()) { + long start = Time.monotonicNow(); // Print the processTrees for debugging. if (LOG.isDebugEnabled()) { StringBuilder tmp = new StringBuilder("[ "); @@ -587,6 +589,9 @@ public void run() { // Save the aggregated utilization of the containers setContainersUtilization(trackedContainersUtilization); + long duration = Time.monotonicNow() - start; + LOG.debug("Finished monitoring container cost {} ms", duration); + // Publish the container utilization metrics to node manager // metrics system. NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics(); @@ -597,6 +602,7 @@ public void run() { trackedContainersUtilization.getVirtualMemory()); nmMetrics.setContainerCpuUtilization( trackedContainersUtilization.getCPU()); + nmMetrics.addContainerMonitorCostTime(duration); } try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 775196f582..86c67f74d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -118,6 +118,9 @@ public class NodeManagerMetrics { @Metric("Container localization time in milliseconds") MutableRate localizationDurationMillis; + @Metric("ContainerMonitor time cost in milliseconds") + MutableGaugeLong containersMonitorCostTime; + // CHECKSTYLE:ON:VisibilityModifier private JvmMetrics jvmMetrics = null; @@ -481,4 +484,9 @@ public void localizationCacheHitMiss(long size) { public void localizationComplete(long downloadMillis) { localizationDurationMillis.add(downloadMillis); } + + public void addContainerMonitorCostTime(long duration) { + containersMonitorCostTime.incr(duration); + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java index 8421666515..a20f27d15a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java @@ -130,9 +130,12 @@ public void testReferenceOfSingletonJvmMetrics() { // Update resource and check available resource again metrics.addResource(total); + metrics.addContainerMonitorCostTime(200L); + MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics"); assertGauge("AvailableGB", 12, rb); assertGauge("AvailableVCores", 19, rb); + assertGauge("ContainersMonitorCostTime", 200L, rb); } public static void checkMetrics(int launched, int completed, int failed,